gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2021 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus ([email protected]).
  23    x86_64 support by Jan Hubicka ([email protected])
  24    VIA PadLock support by Michal Ludvig ([email protected])
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35 #include <limits.h>
  36
  37 #ifndef INFER_ADDR_PREFIX
  38 #define INFER_ADDR_PREFIX 1
  39 #endif
  40
  41 #ifndef DEFAULT_ARCH
  42 #define DEFAULT_ARCH "i386"
  43 #endif
  44
  45 #ifndef INLINE
  46 #if __GNUC__ >= 2
  47 #define INLINE __inline__
  48 #else
  49 #define INLINE
  50 #endif
  51 #endif
  52
  53 /* Prefixes will be emitted in the order defined below.
  54    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  55    instruction, and so must come before any prefixes.
  56    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  57    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  58 #define WAIT_PREFIX     0
  59 #define SEG_PREFIX      1
  60 #define ADDR_PREFIX     2
  61 #define DATA_PREFIX     3
  62 #define REP_PREFIX      4
  63 #define HLE_PREFIX      REP_PREFIX
  64 #define BND_PREFIX      REP_PREFIX
  65 #define LOCK_PREFIX     5
  66 #define REX_PREFIX      6       /* must come last.  */
  67 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  68
  69 /* we define the syntax here (modulo base,index,scale syntax) */
  70 #define REGISTER_PREFIX '%'
  71 #define IMMEDIATE_PREFIX '$'
  72 #define ABSOLUTE_PREFIX '*'
  73
  74 /* these are the instruction mnemonic suffixes in AT&T syntax or
  75    memory operand size in Intel syntax.  */
  76 #define WORD_MNEM_SUFFIX  'w'
  77 #define BYTE_MNEM_SUFFIX  'b'
  78 #define SHORT_MNEM_SUFFIX 's'
  79 #define LONG_MNEM_SUFFIX  'l'
  80 #define QWORD_MNEM_SUFFIX  'q'
  81 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  82    in instructions.  */
  83 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  84
  85 #define END_OF_INSN '\0'
  86
  87 /* This matches the C -> StaticRounding alias in the opcode table.  */
  88 #define commutative staticrounding
  89
  90 /*
  91   'templates' is for grouping together 'template' structures for opcodes
  92   of the same name.  This is only used for storing the insns in the grand
  93   ole hash table of insns.
  94   The templates themselves start at START and range up to (but not including)
  95   END.
  96   */
  97 typedef struct
  98 {
  99   const insn_template *start;
 100   const insn_template *end;
 101 }
 102 templates;
 103
 104 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 105 typedef struct
 106 {
 107   unsigned int regmem;  /* codes register or memory operand */
 108   unsigned int reg;     /* codes register operand (or extended opcode) */
 109   unsigned int mode;    /* how to interpret regmem & reg */
 110 }
 111 modrm_byte;
 112
 113 /* x86-64 extension prefix.  */
 114 typedef int rex_byte;
 115
 116 /* 386 opcode byte to code indirect addressing.  */
 117 typedef struct
 118 {
 119   unsigned base;
 120   unsigned index;
 121   unsigned scale;
 122 }
 123 sib_byte;
 124
 125 /* x86 arch names, types and features */
 126 typedef struct
 127 {
 128   const char *name;             /* arch name */
 129   unsigned int len;             /* arch string length */
 130   enum processor_type type;     /* arch type */
 131   i386_cpu_flags flags;         /* cpu feature flags */
 132   unsigned int skip;            /* show_arch should skip this. */
 133 }
 134 arch_entry;
 135
 136 /* Used to turn off indicated flags.  */
 137 typedef struct
 138 {
 139   const char *name;             /* arch name */
 140   unsigned int len;             /* arch string length */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142 }
 143 noarch_entry;
 144
 145 static void update_code_flag (int, int);
 146 static void set_code_flag (int);
 147 static void set_16bit_gcc_code_flag (int);
 148 static void set_intel_syntax (int);
 149 static void set_intel_mnemonic (int);
 150 static void set_allow_index_reg (int);
 151 static void set_check (int);
 152 static void set_cpu_arch (int);
 153 #ifdef TE_PE
 154 static void pe_directive_secrel (int);
 155 #endif
 156 static void signed_cons (int);
 157 static char *output_invalid (int c);
 158 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 159                                     const char *);
 160 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 161                                        const char *);
 162 static int i386_att_operand (char *);
 163 static int i386_intel_operand (char *, int);
 164 static int i386_intel_simplify (expressionS *);
 165 static int i386_intel_parse_name (const char *, expressionS *);
 166 static const reg_entry *parse_register (char *, char **);
 167 static char *parse_insn (char *, char *);
 168 static char *parse_operands (char *, const char *);
 169 static void swap_operands (void);
 170 static void swap_2_operands (unsigned int, unsigned int);
 171 static enum flag_code i386_addressing_mode (void);
 172 static void optimize_imm (void);
 173 static void optimize_disp (void);
 174 static const insn_template *match_template (char);
 175 static int check_string (void);
 176 static int process_suffix (void);
 177 static int check_byte_reg (void);
 178 static int check_long_reg (void);
 179 static int check_qword_reg (void);
 180 static int check_word_reg (void);
 181 static int finalize_imm (void);
 182 static int process_operands (void);
 183 static const reg_entry *build_modrm_byte (void);
 184 static void output_insn (void);
 185 static void output_imm (fragS *, offsetT);
 186 static void output_disp (fragS *, offsetT);
 187 #ifndef I386COFF
 188 static void s_bss (int);
 189 #endif
 190 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 191 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 192
 193 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 194 static unsigned int x86_isa_1_used;
 195 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 196 static unsigned int x86_feature_2_used;
 197 /* Generate x86 used ISA and feature properties.  */
 198 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 199 #endif
 200
 201 static const char *default_arch = DEFAULT_ARCH;
 202
 203 /* parse_register() returns this when a register alias cannot be used.  */
 204 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 205                                    { Dw2Inval, Dw2Inval } };
 206
 207 static const reg_entry *reg_eax;
 208 static const reg_entry *reg_ds;
 209 static const reg_entry *reg_es;
 210 static const reg_entry *reg_ss;
 211 static const reg_entry *reg_st0;
 212 static const reg_entry *reg_k0;
 213
 214 /* VEX prefix.  */
 215 typedef struct
 216 {
 217   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 218   unsigned char bytes[4];
 219   unsigned int length;
 220   /* Destination or source register specifier.  */
 221   const reg_entry *register_specifier;
 222 } vex_prefix;
 223
 224 /* 'md_assemble ()' gathers together information and puts it into a
 225    i386_insn.  */
 226
 227 union i386_op
 228   {
 229     expressionS *disps;
 230     expressionS *imms;
 231     const reg_entry *regs;
 232   };
 233
 234 enum i386_error
 235   {
 236     operand_size_mismatch,
 237     operand_type_mismatch,
 238     register_type_mismatch,
 239     number_of_operands_mismatch,
 240     invalid_instruction_suffix,
 241     bad_imm4,
 242     unsupported_with_intel_mnemonic,
 243     unsupported_syntax,
 244     unsupported,
 245     invalid_sib_address,
 246     invalid_vsib_address,
 247     invalid_vector_register_set,
 248     invalid_tmm_register_set,
 249     unsupported_vector_index_register,
 250     unsupported_broadcast,
 251     broadcast_needed,
 252     unsupported_masking,
 253     mask_not_on_destination,
 254     no_default_mask,
 255     unsupported_rc_sae,
 256     rc_sae_operand_not_last_imm,
 257     invalid_register_operand,
 258   };
 259
 260 struct _i386_insn
 261   {
 262     /* TM holds the template for the insn were currently assembling.  */
 263     insn_template tm;
 264
 265     /* SUFFIX holds the instruction size suffix for byte, word, dword
 266        or qword, if given.  */
 267     char suffix;
 268
 269     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 270     unsigned char opcode_length;
 271
 272     /* OPERANDS gives the number of given operands.  */
 273     unsigned int operands;
 274
 275     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 276        of given register, displacement, memory operands and immediate
 277        operands.  */
 278     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 279
 280     /* TYPES [i] is the type (see above #defines) which tells us how to
 281        use OP[i] for the corresponding operand.  */
 282     i386_operand_type types[MAX_OPERANDS];
 283
 284     /* Displacement expression, immediate expression, or register for each
 285        operand.  */
 286     union i386_op op[MAX_OPERANDS];
 287
 288     /* Flags for operands.  */
 289     unsigned int flags[MAX_OPERANDS];
 290 #define Operand_PCrel 1
 291 #define Operand_Mem   2
 292
 293     /* Relocation type for operand */
 294     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 295
 296     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 297        the base index byte below.  */
 298     const reg_entry *base_reg;
 299     const reg_entry *index_reg;
 300     unsigned int log2_scale_factor;
 301
 302     /* SEG gives the seg_entries of this insn.  They are zero unless
 303        explicit segment overrides are given.  */
 304     const reg_entry *seg[2];
 305
 306     /* Copied first memory operand string, for re-checking.  */
 307     char *memop1_string;
 308
 309     /* PREFIX holds all the given prefix opcodes (usually null).
 310        PREFIXES is the number of prefix opcodes.  */
 311     unsigned int prefixes;
 312     unsigned char prefix[MAX_PREFIXES];
 313
 314     /* Register is in low 3 bits of opcode.  */
 315     bool short_form;
 316
 317     /* The operand to a branch insn indicates an absolute branch.  */
 318     bool jumpabsolute;
 319
 320     /* Extended states.  */
 321     enum
 322       {
 323         /* Use MMX state.  */
 324         xstate_mmx = 1 << 0,
 325         /* Use XMM state.  */
 326         xstate_xmm = 1 << 1,
 327         /* Use YMM state.  */
 328         xstate_ymm = 1 << 2 | xstate_xmm,
 329         /* Use ZMM state.  */
 330         xstate_zmm = 1 << 3 | xstate_ymm,
 331         /* Use TMM state.  */
 332         xstate_tmm = 1 << 4,
 333         /* Use MASK state.  */
 334         xstate_mask = 1 << 5
 335       } xstate;
 336
 337     /* Has GOTPC or TLS relocation.  */
 338     bool has_gotpc_tls_reloc;
 339
 340     /* RM and SIB are the modrm byte and the sib byte where the
 341        addressing modes of this insn are encoded.  */
 342     modrm_byte rm;
 343     rex_byte rex;
 344     rex_byte vrex;
 345     sib_byte sib;
 346     vex_prefix vex;
 347
 348     /* Masking attributes.
 349
 350        The struct describes masking, applied to OPERAND in the instruction.
 351        REG is a pointer to the corresponding mask register.  ZEROING tells
 352        whether merging or zeroing mask is used.  */
 353     struct Mask_Operation
 354     {
 355       const reg_entry *reg;
 356       unsigned int zeroing;
 357       /* The operand where this operation is associated.  */
 358       unsigned int operand;
 359     } mask;
 360
 361     /* Rounding control and SAE attributes.  */
 362     struct RC_Operation
 363     {
 364       enum rc_type
 365         {
 366           rc_none = -1,
 367           rne,
 368           rd,
 369           ru,
 370           rz,
 371           saeonly
 372         } type;
 373
 374       unsigned int operand;
 375     } rounding;
 376
 377     /* Broadcasting attributes.
 378
 379        The struct describes broadcasting, applied to OPERAND.  TYPE is
 380        expresses the broadcast factor.  */
 381     struct Broadcast_Operation
 382     {
 383       /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
 384       unsigned int type;
 385
 386       /* Index of broadcasted operand.  */
 387       unsigned int operand;
 388
 389       /* Number of bytes to broadcast.  */
 390       unsigned int bytes;
 391     } broadcast;
 392
 393     /* Compressed disp8*N attribute.  */
 394     unsigned int memshift;
 395
 396     /* Prefer load or store in encoding.  */
 397     enum
 398       {
 399         dir_encoding_default = 0,
 400         dir_encoding_load,
 401         dir_encoding_store,
 402         dir_encoding_swap
 403       } dir_encoding;
 404
 405     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 406     enum
 407       {
 408         disp_encoding_default = 0,
 409         disp_encoding_8bit,
 410         disp_encoding_16bit,
 411         disp_encoding_32bit
 412       } disp_encoding;
 413
 414     /* Prefer the REX byte in encoding.  */
 415     bool rex_encoding;
 416
 417     /* Disable instruction size optimization.  */
 418     bool no_optimize;
 419
 420     /* How to encode vector instructions.  */
 421     enum
 422       {
 423         vex_encoding_default = 0,
 424         vex_encoding_vex,
 425         vex_encoding_vex3,
 426         vex_encoding_evex,
 427         vex_encoding_error
 428       } vec_encoding;
 429
 430     /* REP prefix.  */
 431     const char *rep_prefix;
 432
 433     /* HLE prefix.  */
 434     const char *hle_prefix;
 435
 436     /* Have BND prefix.  */
 437     const char *bnd_prefix;
 438
 439     /* Have NOTRACK prefix.  */
 440     const char *notrack_prefix;
 441
 442     /* Error message.  */
 443     enum i386_error error;
 444   };
 445
 446 typedef struct _i386_insn i386_insn;
 447
 448 /* Link RC type with corresponding string, that'll be looked for in
 449    asm.  */
 450 struct RC_name
 451 {
 452   enum rc_type type;
 453   const char *name;
 454   unsigned int len;
 455 };
 456
 457 static const struct RC_name RC_NamesTable[] =
 458 {
 459   {  rne, STRING_COMMA_LEN ("rn-sae") },
 460   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 461   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 462   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 463   {  saeonly,  STRING_COMMA_LEN ("sae") },
 464 };
 465
 466 /* List of chars besides those in app.c:symbol_chars that can start an
 467    operand.  Used to prevent the scrubber eating vital white-space.  */
 468 const char extra_symbol_chars[] = "*%-([{}"
 469 #ifdef LEX_AT
 470         "@"
 471 #endif
 472 #ifdef LEX_QM
 473         "?"
 474 #endif
 475         ;
 476
 477 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 478      && !defined (TE_GNU)                               \
 479      && !defined (TE_LINUX)                             \
 480      && !defined (TE_FreeBSD)                           \
 481      && !defined (TE_DragonFly)                         \
 482      && !defined (TE_NetBSD))
 483 /* This array holds the chars that always start a comment.  If the
 484    pre-processor is disabled, these aren't very useful.  The option
 485    --divide will remove '/' from this list.  */
 486 const char *i386_comment_chars = "#/";
 487 #define SVR4_COMMENT_CHARS 1
 488 #define PREFIX_SEPARATOR '\\'
 489
 490 #else
 491 const char *i386_comment_chars = "#";
 492 #define PREFIX_SEPARATOR '/'
 493 #endif
 494
 495 /* This array holds the chars that only start a comment at the beginning of
 496    a line.  If the line seems to have the form '# 123 filename'
 497    .line and .file directives will appear in the pre-processed output.
 498    Note that input_file.c hand checks for '#' at the beginning of the
 499    first line of the input file.  This is because the compiler outputs
 500    #NO_APP at the beginning of its output.
 501    Also note that comments started like this one will always work if
 502    '/' isn't otherwise defined.  */
 503 const char line_comment_chars[] = "#/";
 504
 505 const char line_separator_chars[] = ";";
 506
 507 /* Chars that can be used to separate mant from exp in floating point
 508    nums.  */
 509 const char EXP_CHARS[] = "eE";
 510
 511 /* Chars that mean this number is a floating point constant
 512    As in 0f12.456
 513    or    0d1.2345e12.  */
 514 const char FLT_CHARS[] = "fFdDxX";
 515
 516 /* Tables for lexical analysis.  */
 517 static char mnemonic_chars[256];
 518 static char register_chars[256];
 519 static char operand_chars[256];
 520 static char identifier_chars[256];
 521
 522 /* Lexical macros.  */
 523 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 524 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 525 #define is_register_char(x) (register_chars[(unsigned char) x])
 526 #define is_space_char(x) ((x) == ' ')
 527 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 528
 529 /* All non-digit non-letter characters that may occur in an operand.  */
 530 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 531
 532 /* md_assemble() always leaves the strings it's passed unaltered.  To
 533    effect this we maintain a stack of saved characters that we've smashed
 534    with '\0's (indicating end of strings for various sub-fields of the
 535    assembler instruction).  */
 536 static char save_stack[32];
 537 static char *save_stack_p;
 538 #define END_STRING_AND_SAVE(s) \
 539         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 540 #define RESTORE_END_STRING(s) \
 541         do { *(s) = *--save_stack_p; } while (0)
 542
 543 /* The instruction we're assembling.  */
 544 static i386_insn i;
 545
 546 /* Possible templates for current insn.  */
 547 static const templates *current_templates;
 548
 549 /* Per instruction expressionS buffers: max displacements & immediates.  */
 550 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 551 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 552
 553 /* Current operand we are working on.  */
 554 static int this_operand = -1;
 555
 556 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 557    these.  */
 558
 559 enum flag_code {
 560         CODE_32BIT,
 561         CODE_16BIT,
 562         CODE_64BIT };
 563
 564 static enum flag_code flag_code;
 565 static unsigned int object_64bit;
 566 static unsigned int disallow_64bit_reloc;
 567 static int use_rela_relocations = 0;
 568 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 569 static const char *tls_get_addr;
 570
 571 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 572      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 573      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 574
 575 /* The ELF ABI to use.  */
 576 enum x86_elf_abi
 577 {
 578   I386_ABI,
 579   X86_64_ABI,
 580   X86_64_X32_ABI
 581 };
 582
 583 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 584 #endif
 585
 586 #if defined (TE_PE) || defined (TE_PEP)
 587 /* Use big object file format.  */
 588 static int use_big_obj = 0;
 589 #endif
 590
 591 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 592 /* 1 if generating code for a shared library.  */
 593 static int shared = 0;
 594 #endif
 595
 596 /* 1 for intel syntax,
 597    0 if att syntax.  */
 598 static int intel_syntax = 0;
 599
 600 static enum x86_64_isa
 601 {
 602   amd64 = 1,    /* AMD64 ISA.  */
 603   intel64       /* Intel64 ISA.  */
 604 } isa64;
 605
 606 /* 1 for intel mnemonic,
 607    0 if att mnemonic.  */
 608 static int intel_mnemonic = !SYSV386_COMPAT;
 609
 610 /* 1 if pseudo registers are permitted.  */
 611 static int allow_pseudo_reg = 0;
 612
 613 /* 1 if register prefix % not required.  */
 614 static int allow_naked_reg = 0;
 615
 616 /* 1 if the assembler should add BND prefix for all control-transferring
 617    instructions supporting it, even if this prefix wasn't specified
 618    explicitly.  */
 619 static int add_bnd_prefix = 0;
 620
 621 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 622 static int allow_index_reg = 0;
 623
 624 /* 1 if the assembler should ignore LOCK prefix, even if it was
 625    specified explicitly.  */
 626 static int omit_lock_prefix = 0;
 627
 628 /* 1 if the assembler should encode lfence, mfence, and sfence as
 629    "lock addl $0, (%{re}sp)".  */
 630 static int avoid_fence = 0;
 631
 632 /* 1 if lfence should be inserted after every load.  */
 633 static int lfence_after_load = 0;
 634
 635 /* Non-zero if lfence should be inserted before indirect branch.  */
 636 static enum lfence_before_indirect_branch_kind
 637   {
 638     lfence_branch_none = 0,
 639     lfence_branch_register,
 640     lfence_branch_memory,
 641     lfence_branch_all
 642   }
 643 lfence_before_indirect_branch;
 644
 645 /* Non-zero if lfence should be inserted before ret.  */
 646 static enum lfence_before_ret_kind
 647   {
 648     lfence_before_ret_none = 0,
 649     lfence_before_ret_not,
 650     lfence_before_ret_or,
 651     lfence_before_ret_shl
 652   }
 653 lfence_before_ret;
 654
 655 /* Types of previous instruction is .byte or prefix.  */
 656 static struct
 657   {
 658     segT seg;
 659     const char *file;
 660     const char *name;
 661     unsigned int line;
 662     enum last_insn_kind
 663       {
 664         last_insn_other = 0,
 665         last_insn_directive,
 666         last_insn_prefix
 667       } kind;
 668   } last_insn;
 669
 670 /* 1 if the assembler should generate relax relocations.  */
 671
 672 static int generate_relax_relocations
 673   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 674
 675 static enum check_kind
 676   {
 677     check_none = 0,
 678     check_warning,
 679     check_error
 680   }
 681 sse_check, operand_check = check_warning;
 682
 683 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 684 static int align_branch_power = 0;
 685
 686 /* Types of branches to align.  */
 687 enum align_branch_kind
 688   {
 689     align_branch_none = 0,
 690     align_branch_jcc = 1,
 691     align_branch_fused = 2,
 692     align_branch_jmp = 3,
 693     align_branch_call = 4,
 694     align_branch_indirect = 5,
 695     align_branch_ret = 6
 696   };
 697
 698 /* Type bits of branches to align.  */
 699 enum align_branch_bit
 700   {
 701     align_branch_jcc_bit = 1 << align_branch_jcc,
 702     align_branch_fused_bit = 1 << align_branch_fused,
 703     align_branch_jmp_bit = 1 << align_branch_jmp,
 704     align_branch_call_bit = 1 << align_branch_call,
 705     align_branch_indirect_bit = 1 << align_branch_indirect,
 706     align_branch_ret_bit = 1 << align_branch_ret
 707   };
 708
 709 static unsigned int align_branch = (align_branch_jcc_bit
 710                                     | align_branch_fused_bit
 711                                     | align_branch_jmp_bit);
 712
 713 /* Types of condition jump used by macro-fusion.  */
 714 enum mf_jcc_kind
 715   {
 716     mf_jcc_jo = 0,  /* base opcode 0x70  */
 717     mf_jcc_jc,      /* base opcode 0x72  */
 718     mf_jcc_je,      /* base opcode 0x74  */
 719     mf_jcc_jna,     /* base opcode 0x76  */
 720     mf_jcc_js,      /* base opcode 0x78  */
 721     mf_jcc_jp,      /* base opcode 0x7a  */
 722     mf_jcc_jl,      /* base opcode 0x7c  */
 723     mf_jcc_jle,     /* base opcode 0x7e  */
 724   };
 725
 726 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 727 enum mf_cmp_kind
 728   {
 729     mf_cmp_test_and,  /* test/cmp */
 730     mf_cmp_alu_cmp,  /* add/sub/cmp */
 731     mf_cmp_incdec  /* inc/dec */
 732   };
 733
 734 /* The maximum padding size for fused jcc.  CMP like instruction can
 735    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 736    prefixes.   */
 737 #define MAX_FUSED_JCC_PADDING_SIZE 20
 738
 739 /* The maximum number of prefixes added for an instruction.  */
 740 static unsigned int align_branch_prefix_size = 5;
 741
 742 /* Optimization:
 743    1. Clear the REX_W bit with register operand if possible.
 744    2. Above plus use 128bit vector instruction to clear the full vector
 745       register.
 746  */
 747 static int optimize = 0;
 748
 749 /* Optimization:
 750    1. Clear the REX_W bit with register operand if possible.
 751    2. Above plus use 128bit vector instruction to clear the full vector
 752       register.
 753    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 754       "testb $imm7,%r8".
 755  */
 756 static int optimize_for_space = 0;
 757
 758 /* Register prefix used for error message.  */
 759 static const char *register_prefix = "%";
 760
 761 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 762    leave, push, and pop instructions so that gcc has the same stack
 763    frame as in 32 bit mode.  */
 764 static char stackop_size = '\0';
 765
 766 /* Non-zero to optimize code alignment.  */
 767 int optimize_align_code = 1;
 768
 769 /* Non-zero to quieten some warnings.  */
 770 static int quiet_warnings = 0;
 771
 772 /* CPU name.  */
 773 static const char *cpu_arch_name = NULL;
 774 static char *cpu_sub_arch_name = NULL;
 775
 776 /* CPU feature flags.  */
 777 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 778
 779 /* If we have selected a cpu we are generating instructions for.  */
 780 static int cpu_arch_tune_set = 0;
 781
 782 /* Cpu we are generating instructions for.  */
 783 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 784
 785 /* CPU feature flags of cpu we are generating instructions for.  */
 786 static i386_cpu_flags cpu_arch_tune_flags;
 787
 788 /* CPU instruction set architecture used.  */
 789 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 790
 791 /* CPU feature flags of instruction set architecture used.  */
 792 i386_cpu_flags cpu_arch_isa_flags;
 793
 794 /* If set, conditional jumps are not automatically promoted to handle
 795    larger than a byte offset.  */
 796 static unsigned int no_cond_jump_promotion = 0;
 797
 798 /* Encode SSE instructions with VEX prefix.  */
 799 static unsigned int sse2avx;
 800
 801 /* Encode scalar AVX instructions with specific vector length.  */
 802 static enum
 803   {
 804     vex128 = 0,
 805     vex256
 806   } avxscalar;
 807
 808 /* Encode VEX WIG instructions with specific vex.w.  */
 809 static enum
 810   {
 811     vexw0 = 0,
 812     vexw1
 813   } vexwig;
 814
 815 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 816 static enum
 817   {
 818     evexl128 = 0,
 819     evexl256,
 820     evexl512
 821   } evexlig;
 822
 823 /* Encode EVEX WIG instructions with specific evex.w.  */
 824 static enum
 825   {
 826     evexw0 = 0,
 827     evexw1
 828   } evexwig;
 829
 830 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 831 static enum rc_type evexrcig = rne;
 832
 833 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 834 static symbolS *GOT_symbol;
 835
 836 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 837 unsigned int x86_dwarf2_return_column;
 838
 839 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 840 int x86_cie_data_alignment;
 841
 842 /* Interface to relax_segment.
 843    There are 3 major relax states for 386 jump insns because the
 844    different types of jumps add different sizes to frags when we're
 845    figuring out what sort of jump to choose to reach a given label.
 846
 847    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 848    branches which are handled by md_estimate_size_before_relax() and
 849    i386_generic_table_relax_frag().  */
 850
 851 /* Types.  */
 852 #define UNCOND_JUMP 0
 853 #define COND_JUMP 1
 854 #define COND_JUMP86 2
 855 #define BRANCH_PADDING 3
 856 #define BRANCH_PREFIX 4
 857 #define FUSED_JCC_PADDING 5
 858
 859 /* Sizes.  */
 860 #define CODE16  1
 861 #define SMALL   0
 862 #define SMALL16 (SMALL | CODE16)
 863 #define BIG     2
 864 #define BIG16   (BIG | CODE16)
 865
 866 #ifndef INLINE
 867 #ifdef __GNUC__
 868 #define INLINE __inline__
 869 #else
 870 #define INLINE
 871 #endif
 872 #endif
 873
 874 #define ENCODE_RELAX_STATE(type, size) \
 875   ((relax_substateT) (((type) << 2) | (size)))
 876 #define TYPE_FROM_RELAX_STATE(s) \
 877   ((s) >> 2)
 878 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 879     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 880
 881 /* This table is used by relax_frag to promote short jumps to long
 882    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 883    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 884    don't allow a short jump in a 32 bit code segment to be promoted to
 885    a 16 bit offset jump because it's slower (requires data size
 886    prefix), and doesn't work, unless the destination is in the bottom
 887    64k of the code segment (The top 16 bits of eip are zeroed).  */
 888
 889 const relax_typeS md_relax_table[] =
 890 {
 891   /* The fields are:
 892      1) most positive reach of this state,
 893      2) most negative reach of this state,
 894      3) how many bytes this mode will have in the variable part of the frag
 895      4) which index into the table to try if we can't fit into this one.  */
 896
 897   /* UNCOND_JUMP states.  */
 898   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 899   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 900   /* dword jmp adds 4 bytes to frag:
 901      0 extra opcode bytes, 4 displacement bytes.  */
 902   {0, 0, 4, 0},
 903   /* word jmp adds 2 byte2 to frag:
 904      0 extra opcode bytes, 2 displacement bytes.  */
 905   {0, 0, 2, 0},
 906
 907   /* COND_JUMP states.  */
 908   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 909   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 910   /* dword conditionals adds 5 bytes to frag:
 911      1 extra opcode byte, 4 displacement bytes.  */
 912   {0, 0, 5, 0},
 913   /* word conditionals add 3 bytes to frag:
 914      1 extra opcode byte, 2 displacement bytes.  */
 915   {0, 0, 3, 0},
 916
 917   /* COND_JUMP86 states.  */
 918   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 919   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 920   /* dword conditionals adds 5 bytes to frag:
 921      1 extra opcode byte, 4 displacement bytes.  */
 922   {0, 0, 5, 0},
 923   /* word conditionals add 4 bytes to frag:
 924      1 displacement byte and a 3 byte long branch insn.  */
 925   {0, 0, 4, 0}
 926 };
 927
 928 static const arch_entry cpu_arch[] =
 929 {
 930   /* Do not replace the first two entries - i386_target_format()
 931      relies on them being there in this order.  */
 932   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 933     CPU_GENERIC32_FLAGS, 0 },
 934   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 935     CPU_GENERIC64_FLAGS, 0 },
 936   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 937     CPU_NONE_FLAGS, 0 },
 938   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 939     CPU_I186_FLAGS, 0 },
 940   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 941     CPU_I286_FLAGS, 0 },
 942   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 943     CPU_I386_FLAGS, 0 },
 944   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 945     CPU_I486_FLAGS, 0 },
 946   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 947     CPU_I586_FLAGS, 0 },
 948   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 949     CPU_I686_FLAGS, 0 },
 950   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 951     CPU_I586_FLAGS, 0 },
 952   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 953     CPU_PENTIUMPRO_FLAGS, 0 },
 954   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 955     CPU_P2_FLAGS, 0 },
 956   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 957     CPU_P3_FLAGS, 0 },
 958   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 959     CPU_P4_FLAGS, 0 },
 960   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 961     CPU_CORE_FLAGS, 0 },
 962   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 963     CPU_NOCONA_FLAGS, 0 },
 964   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 965     CPU_CORE_FLAGS, 1 },
 966   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 967     CPU_CORE_FLAGS, 0 },
 968   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 969     CPU_CORE2_FLAGS, 1 },
 970   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 971     CPU_CORE2_FLAGS, 0 },
 972   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 973     CPU_COREI7_FLAGS, 0 },
 974   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
 975     CPU_L1OM_FLAGS, 0 },
 976   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
 977     CPU_K1OM_FLAGS, 0 },
 978   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 979     CPU_IAMCU_FLAGS, 0 },
 980   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 981     CPU_K6_FLAGS, 0 },
 982   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 983     CPU_K6_2_FLAGS, 0 },
 984   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 985     CPU_ATHLON_FLAGS, 0 },
 986   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 987     CPU_K8_FLAGS, 1 },
 988   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
 989     CPU_K8_FLAGS, 0 },
 990   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
 991     CPU_K8_FLAGS, 0 },
 992   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
 993     CPU_AMDFAM10_FLAGS, 0 },
 994   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
 995     CPU_BDVER1_FLAGS, 0 },
 996   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
 997     CPU_BDVER2_FLAGS, 0 },
 998   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
 999     CPU_BDVER3_FLAGS, 0 },
1000   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
1001     CPU_BDVER4_FLAGS, 0 },
1002   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
1003     CPU_ZNVER1_FLAGS, 0 },
1004   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
1005     CPU_ZNVER2_FLAGS, 0 },
1006   { STRING_COMMA_LEN ("znver3"), PROCESSOR_ZNVER,
1007     CPU_ZNVER3_FLAGS, 0 },
1008   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
1009     CPU_BTVER1_FLAGS, 0 },
1010   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
1011     CPU_BTVER2_FLAGS, 0 },
1012   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
1013     CPU_8087_FLAGS, 0 },
1014   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
1015     CPU_287_FLAGS, 0 },
1016   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
1017     CPU_387_FLAGS, 0 },
1018   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
1019     CPU_687_FLAGS, 0 },
1020   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
1021     CPU_CMOV_FLAGS, 0 },
1022   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
1023     CPU_FXSR_FLAGS, 0 },
1024   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1025     CPU_MMX_FLAGS, 0 },
1026   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1027     CPU_SSE_FLAGS, 0 },
1028   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1029     CPU_SSE2_FLAGS, 0 },
1030   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1031     CPU_SSE3_FLAGS, 0 },
1032   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1033     CPU_SSE4A_FLAGS, 0 },
1034   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1035     CPU_SSSE3_FLAGS, 0 },
1036   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1037     CPU_SSE4_1_FLAGS, 0 },
1038   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1039     CPU_SSE4_2_FLAGS, 0 },
1040   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1041     CPU_SSE4_2_FLAGS, 0 },
1042   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1043     CPU_AVX_FLAGS, 0 },
1044   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1045     CPU_AVX2_FLAGS, 0 },
1046   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1047     CPU_AVX512F_FLAGS, 0 },
1048   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1049     CPU_AVX512CD_FLAGS, 0 },
1050   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1051     CPU_AVX512ER_FLAGS, 0 },
1052   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1053     CPU_AVX512PF_FLAGS, 0 },
1054   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1055     CPU_AVX512DQ_FLAGS, 0 },
1056   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1057     CPU_AVX512BW_FLAGS, 0 },
1058   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1059     CPU_AVX512VL_FLAGS, 0 },
1060   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1061     CPU_VMX_FLAGS, 0 },
1062   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1063     CPU_VMFUNC_FLAGS, 0 },
1064   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1065     CPU_SMX_FLAGS, 0 },
1066   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1067     CPU_XSAVE_FLAGS, 0 },
1068   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1069     CPU_XSAVEOPT_FLAGS, 0 },
1070   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1071     CPU_XSAVEC_FLAGS, 0 },
1072   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1073     CPU_XSAVES_FLAGS, 0 },
1074   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1075     CPU_AES_FLAGS, 0 },
1076   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1077     CPU_PCLMUL_FLAGS, 0 },
1078   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1079     CPU_PCLMUL_FLAGS, 1 },
1080   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1081     CPU_FSGSBASE_FLAGS, 0 },
1082   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1083     CPU_RDRND_FLAGS, 0 },
1084   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1085     CPU_F16C_FLAGS, 0 },
1086   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1087     CPU_BMI2_FLAGS, 0 },
1088   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1089     CPU_FMA_FLAGS, 0 },
1090   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1091     CPU_FMA4_FLAGS, 0 },
1092   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1093     CPU_XOP_FLAGS, 0 },
1094   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1095     CPU_LWP_FLAGS, 0 },
1096   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1097     CPU_MOVBE_FLAGS, 0 },
1098   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1099     CPU_CX16_FLAGS, 0 },
1100   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1101     CPU_EPT_FLAGS, 0 },
1102   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1103     CPU_LZCNT_FLAGS, 0 },
1104   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1105     CPU_POPCNT_FLAGS, 0 },
1106   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1107     CPU_HLE_FLAGS, 0 },
1108   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1109     CPU_RTM_FLAGS, 0 },
1110   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1111     CPU_INVPCID_FLAGS, 0 },
1112   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1113     CPU_CLFLUSH_FLAGS, 0 },
1114   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1115     CPU_NOP_FLAGS, 0 },
1116   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1117     CPU_SYSCALL_FLAGS, 0 },
1118   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1119     CPU_RDTSCP_FLAGS, 0 },
1120   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1121     CPU_3DNOW_FLAGS, 0 },
1122   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1123     CPU_3DNOWA_FLAGS, 0 },
1124   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1125     CPU_PADLOCK_FLAGS, 0 },
1126   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1127     CPU_SVME_FLAGS, 1 },
1128   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1129     CPU_SVME_FLAGS, 0 },
1130   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1131     CPU_SSE4A_FLAGS, 0 },
1132   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1133     CPU_ABM_FLAGS, 0 },
1134   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1135     CPU_BMI_FLAGS, 0 },
1136   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1137     CPU_TBM_FLAGS, 0 },
1138   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1139     CPU_ADX_FLAGS, 0 },
1140   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1141     CPU_RDSEED_FLAGS, 0 },
1142   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1143     CPU_PRFCHW_FLAGS, 0 },
1144   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1145     CPU_SMAP_FLAGS, 0 },
1146   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1147     CPU_MPX_FLAGS, 0 },
1148   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1149     CPU_SHA_FLAGS, 0 },
1150   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1151     CPU_CLFLUSHOPT_FLAGS, 0 },
1152   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1153     CPU_PREFETCHWT1_FLAGS, 0 },
1154   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1155     CPU_SE1_FLAGS, 0 },
1156   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1157     CPU_CLWB_FLAGS, 0 },
1158   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1159     CPU_AVX512IFMA_FLAGS, 0 },
1160   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1161     CPU_AVX512VBMI_FLAGS, 0 },
1162   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1163     CPU_AVX512_4FMAPS_FLAGS, 0 },
1164   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1165     CPU_AVX512_4VNNIW_FLAGS, 0 },
1166   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1167     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1168   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1169     CPU_AVX512_VBMI2_FLAGS, 0 },
1170   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1171     CPU_AVX512_VNNI_FLAGS, 0 },
1172   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1173     CPU_AVX512_BITALG_FLAGS, 0 },
1174   { STRING_COMMA_LEN (".avx_vnni"), PROCESSOR_UNKNOWN,
1175     CPU_AVX_VNNI_FLAGS, 0 },
1176   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1177     CPU_CLZERO_FLAGS, 0 },
1178   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1179     CPU_MWAITX_FLAGS, 0 },
1180   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1181     CPU_OSPKE_FLAGS, 0 },
1182   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1183     CPU_RDPID_FLAGS, 0 },
1184   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1185     CPU_PTWRITE_FLAGS, 0 },
1186   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1187     CPU_IBT_FLAGS, 0 },
1188   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1189     CPU_SHSTK_FLAGS, 0 },
1190   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1191     CPU_GFNI_FLAGS, 0 },
1192   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1193     CPU_VAES_FLAGS, 0 },
1194   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1195     CPU_VPCLMULQDQ_FLAGS, 0 },
1196   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1197     CPU_WBNOINVD_FLAGS, 0 },
1198   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1199     CPU_PCONFIG_FLAGS, 0 },
1200   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1201     CPU_WAITPKG_FLAGS, 0 },
1202   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1203     CPU_CLDEMOTE_FLAGS, 0 },
1204   { STRING_COMMA_LEN (".amx_int8"), PROCESSOR_UNKNOWN,
1205     CPU_AMX_INT8_FLAGS, 0 },
1206   { STRING_COMMA_LEN (".amx_bf16"), PROCESSOR_UNKNOWN,
1207     CPU_AMX_BF16_FLAGS, 0 },
1208   { STRING_COMMA_LEN (".amx_tile"), PROCESSOR_UNKNOWN,
1209     CPU_AMX_TILE_FLAGS, 0 },
1210   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1211     CPU_MOVDIRI_FLAGS, 0 },
1212   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1213     CPU_MOVDIR64B_FLAGS, 0 },
1214   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1215     CPU_AVX512_BF16_FLAGS, 0 },
1216   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1217     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1218   { STRING_COMMA_LEN (".tdx"), PROCESSOR_UNKNOWN,
1219     CPU_TDX_FLAGS, 0 },
1220   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1221     CPU_ENQCMD_FLAGS, 0 },
1222   { STRING_COMMA_LEN (".serialize"), PROCESSOR_UNKNOWN,
1223     CPU_SERIALIZE_FLAGS, 0 },
1224   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1225     CPU_RDPRU_FLAGS, 0 },
1226   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1227     CPU_MCOMMIT_FLAGS, 0 },
1228   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1229     CPU_SEV_ES_FLAGS, 0 },
1230   { STRING_COMMA_LEN (".tsxldtrk"), PROCESSOR_UNKNOWN,
1231     CPU_TSXLDTRK_FLAGS, 0 },
1232   { STRING_COMMA_LEN (".kl"), PROCESSOR_UNKNOWN,
1233     CPU_KL_FLAGS, 0 },
1234   { STRING_COMMA_LEN (".widekl"), PROCESSOR_UNKNOWN,
1235     CPU_WIDEKL_FLAGS, 0 },
1236   { STRING_COMMA_LEN (".uintr"), PROCESSOR_UNKNOWN,
1237     CPU_UINTR_FLAGS, 0 },
1238   { STRING_COMMA_LEN (".hreset"), PROCESSOR_UNKNOWN,
1239     CPU_HRESET_FLAGS, 0 },
1240 };
1241
1242 static const noarch_entry cpu_noarch[] =
1243 {
1244   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1245   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1246   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1247   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1248   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1249   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1250   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1251   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1252   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1253   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1254   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1255   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1256   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1257   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1258   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1259   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1260   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1261   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1262   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1263   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1264   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1265   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1266   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1267   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1268   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1269   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1270   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1271   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1272   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1273   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1274   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1275   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1276   { STRING_COMMA_LEN ("noavx_vnni"), CPU_ANY_AVX_VNNI_FLAGS },
1277   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1278   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1279   { STRING_COMMA_LEN ("noamx_int8"), CPU_ANY_AMX_INT8_FLAGS },
1280   { STRING_COMMA_LEN ("noamx_bf16"), CPU_ANY_AMX_BF16_FLAGS },
1281   { STRING_COMMA_LEN ("noamx_tile"), CPU_ANY_AMX_TILE_FLAGS },
1282   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1283   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1284   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1285   { STRING_COMMA_LEN ("noavx512_vp2intersect"),
1286     CPU_ANY_AVX512_VP2INTERSECT_FLAGS },
1287   { STRING_COMMA_LEN ("notdx"), CPU_ANY_TDX_FLAGS },
1288   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1289   { STRING_COMMA_LEN ("noserialize"), CPU_ANY_SERIALIZE_FLAGS },
1290   { STRING_COMMA_LEN ("notsxldtrk"), CPU_ANY_TSXLDTRK_FLAGS },
1291   { STRING_COMMA_LEN ("nokl"), CPU_ANY_KL_FLAGS },
1292   { STRING_COMMA_LEN ("nowidekl"), CPU_ANY_WIDEKL_FLAGS },
1293   { STRING_COMMA_LEN ("nouintr"), CPU_ANY_UINTR_FLAGS },
1294   { STRING_COMMA_LEN ("nohreset"), CPU_ANY_HRESET_FLAGS },
1295 };
1296
1297 #ifdef I386COFF
1298 /* Like s_lcomm_internal in gas/read.c but the alignment string
1299    is allowed to be optional.  */
1300
1301 static symbolS *
1302 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1303 {
1304   addressT align = 0;
1305
1306   SKIP_WHITESPACE ();
1307
1308   if (needs_align
1309       && *input_line_pointer == ',')
1310     {
1311       align = parse_align (needs_align - 1);
1312
1313       if (align == (addressT) -1)
1314         return NULL;
1315     }
1316   else
1317     {
1318       if (size >= 8)
1319         align = 3;
1320       else if (size >= 4)
1321         align = 2;
1322       else if (size >= 2)
1323         align = 1;
1324       else
1325         align = 0;
1326     }
1327
1328   bss_alloc (symbolP, size, align);
1329   return symbolP;
1330 }
1331
1332 static void
1333 pe_lcomm (int needs_align)
1334 {
1335   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1336 }
1337 #endif
1338
1339 const pseudo_typeS md_pseudo_table[] =
1340 {
1341 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1342   {"align", s_align_bytes, 0},
1343 #else
1344   {"align", s_align_ptwo, 0},
1345 #endif
1346   {"arch", set_cpu_arch, 0},
1347 #ifndef I386COFF
1348   {"bss", s_bss, 0},
1349 #else
1350   {"lcomm", pe_lcomm, 1},
1351 #endif
1352   {"ffloat", float_cons, 'f'},
1353   {"dfloat", float_cons, 'd'},
1354   {"tfloat", float_cons, 'x'},
1355   {"value", cons, 2},
1356   {"slong", signed_cons, 4},
1357   {"noopt", s_ignore, 0},
1358   {"optim", s_ignore, 0},
1359   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1360   {"code16", set_code_flag, CODE_16BIT},
1361   {"code32", set_code_flag, CODE_32BIT},
1362 #ifdef BFD64
1363   {"code64", set_code_flag, CODE_64BIT},
1364 #endif
1365   {"intel_syntax", set_intel_syntax, 1},
1366   {"att_syntax", set_intel_syntax, 0},
1367   {"intel_mnemonic", set_intel_mnemonic, 1},
1368   {"att_mnemonic", set_intel_mnemonic, 0},
1369   {"allow_index_reg", set_allow_index_reg, 1},
1370   {"disallow_index_reg", set_allow_index_reg, 0},
1371   {"sse_check", set_check, 0},
1372   {"operand_check", set_check, 1},
1373 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1374   {"largecomm", handle_large_common, 0},
1375 #else
1376   {"file", dwarf2_directive_file, 0},
1377   {"loc", dwarf2_directive_loc, 0},
1378   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1379 #endif
1380 #ifdef TE_PE
1381   {"secrel32", pe_directive_secrel, 0},
1382 #endif
1383   {0, 0, 0}
1384 };
1385
1386 /* For interface with expression ().  */
1387 extern char *input_line_pointer;
1388
1389 /* Hash table for instruction mnemonic lookup.  */
1390 static htab_t op_hash;
1391
1392 /* Hash table for register lookup.  */
1393 static htab_t reg_hash;
1394 \f
1395   /* Various efficient no-op patterns for aligning code labels.
1396      Note: Don't try to assemble the instructions in the comments.
1397      0L and 0w are not legal.  */
1398 static const unsigned char f32_1[] =
1399   {0x90};                               /* nop                  */
1400 static const unsigned char f32_2[] =
1401   {0x66,0x90};                          /* xchg %ax,%ax         */
1402 static const unsigned char f32_3[] =
1403   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1404 static const unsigned char f32_4[] =
1405   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1406 static const unsigned char f32_6[] =
1407   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1408 static const unsigned char f32_7[] =
1409   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1410 static const unsigned char f16_3[] =
1411   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1412 static const unsigned char f16_4[] =
1413   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1414 static const unsigned char jump_disp8[] =
1415   {0xeb};                               /* jmp disp8           */
1416 static const unsigned char jump32_disp32[] =
1417   {0xe9};                               /* jmp disp32          */
1418 static const unsigned char jump16_disp32[] =
1419   {0x66,0xe9};                          /* jmp disp32          */
1420 /* 32-bit NOPs patterns.  */
1421 static const unsigned char *const f32_patt[] = {
1422   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1423 };
1424 /* 16-bit NOPs patterns.  */
1425 static const unsigned char *const f16_patt[] = {
1426   f32_1, f32_2, f16_3, f16_4
1427 };
1428 /* nopl (%[re]ax) */
1429 static const unsigned char alt_3[] =
1430   {0x0f,0x1f,0x00};
1431 /* nopl 0(%[re]ax) */
1432 static const unsigned char alt_4[] =
1433   {0x0f,0x1f,0x40,0x00};
1434 /* nopl 0(%[re]ax,%[re]ax,1) */
1435 static const unsigned char alt_5[] =
1436   {0x0f,0x1f,0x44,0x00,0x00};
1437 /* nopw 0(%[re]ax,%[re]ax,1) */
1438 static const unsigned char alt_6[] =
1439   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1440 /* nopl 0L(%[re]ax) */
1441 static const unsigned char alt_7[] =
1442   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1443 /* nopl 0L(%[re]ax,%[re]ax,1) */
1444 static const unsigned char alt_8[] =
1445   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1446 /* nopw 0L(%[re]ax,%[re]ax,1) */
1447 static const unsigned char alt_9[] =
1448   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1449 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1450 static const unsigned char alt_10[] =
1451   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1452 /* data16 nopw %cs:0L(%eax,%eax,1) */
1453 static const unsigned char alt_11[] =
1454   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1455 /* 32-bit and 64-bit NOPs patterns.  */
1456 static const unsigned char *const alt_patt[] = {
1457   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1458   alt_9, alt_10, alt_11
1459 };
1460
1461 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1462    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1463
1464 static void
1465 i386_output_nops (char *where, const unsigned char *const *patt,
1466                   int count, int max_single_nop_size)
1467
1468 {
1469   /* Place the longer NOP first.  */
1470   int last;
1471   int offset;
1472   const unsigned char *nops;
1473
1474   if (max_single_nop_size < 1)
1475     {
1476       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1477                 max_single_nop_size);
1478       return;
1479     }
1480
1481   nops = patt[max_single_nop_size - 1];
1482
1483   /* Use the smaller one if the requsted one isn't available.  */
1484   if (nops == NULL)
1485     {
1486       max_single_nop_size--;
1487       nops = patt[max_single_nop_size - 1];
1488     }
1489
1490   last = count % max_single_nop_size;
1491
1492   count -= last;
1493   for (offset = 0; offset < count; offset += max_single_nop_size)
1494     memcpy (where + offset, nops, max_single_nop_size);
1495
1496   if (last)
1497     {
1498       nops = patt[last - 1];
1499       if (nops == NULL)
1500         {
1501           /* Use the smaller one plus one-byte NOP if the needed one
1502              isn't available.  */
1503           last--;
1504           nops = patt[last - 1];
1505           memcpy (where + offset, nops, last);
1506           where[offset + last] = *patt[0];
1507         }
1508       else
1509         memcpy (where + offset, nops, last);
1510     }
1511 }
1512
1513 static INLINE int
1514 fits_in_imm7 (offsetT num)
1515 {
1516   return (num & 0x7f) == num;
1517 }
1518
1519 static INLINE int
1520 fits_in_imm31 (offsetT num)
1521 {
1522   return (num & 0x7fffffff) == num;
1523 }
1524
1525 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1526    single NOP instruction LIMIT.  */
1527
1528 void
1529 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1530 {
1531   const unsigned char *const *patt = NULL;
1532   int max_single_nop_size;
1533   /* Maximum number of NOPs before switching to jump over NOPs.  */
1534   int max_number_of_nops;
1535
1536   switch (fragP->fr_type)
1537     {
1538     case rs_fill_nop:
1539     case rs_align_code:
1540       break;
1541     case rs_machine_dependent:
1542       /* Allow NOP padding for jumps and calls.  */
1543       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1544           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1545         break;
1546       /* Fall through.  */
1547     default:
1548       return;
1549     }
1550
1551   /* We need to decide which NOP sequence to use for 32bit and
1552      64bit. When -mtune= is used:
1553
1554      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1555      PROCESSOR_GENERIC32, f32_patt will be used.
1556      2. For the rest, alt_patt will be used.
1557
1558      When -mtune= isn't used, alt_patt will be used if
1559      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1560      be used.
1561
1562      When -march= or .arch is used, we can't use anything beyond
1563      cpu_arch_isa_flags.   */
1564
1565   if (flag_code == CODE_16BIT)
1566     {
1567       patt = f16_patt;
1568       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1569       /* Limit number of NOPs to 2 in 16-bit mode.  */
1570       max_number_of_nops = 2;
1571     }
1572   else
1573     {
1574       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1575         {
1576           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1577           switch (cpu_arch_tune)
1578             {
1579             case PROCESSOR_UNKNOWN:
1580               /* We use cpu_arch_isa_flags to check if we SHOULD
1581                  optimize with nops.  */
1582               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1583                 patt = alt_patt;
1584               else
1585                 patt = f32_patt;
1586               break;
1587             case PROCESSOR_PENTIUM4:
1588             case PROCESSOR_NOCONA:
1589             case PROCESSOR_CORE:
1590             case PROCESSOR_CORE2:
1591             case PROCESSOR_COREI7:
1592             case PROCESSOR_L1OM:
1593             case PROCESSOR_K1OM:
1594             case PROCESSOR_GENERIC64:
1595             case PROCESSOR_K6:
1596             case PROCESSOR_ATHLON:
1597             case PROCESSOR_K8:
1598             case PROCESSOR_AMDFAM10:
1599             case PROCESSOR_BD:
1600             case PROCESSOR_ZNVER:
1601             case PROCESSOR_BT:
1602               patt = alt_patt;
1603               break;
1604             case PROCESSOR_I386:
1605             case PROCESSOR_I486:
1606             case PROCESSOR_PENTIUM:
1607             case PROCESSOR_PENTIUMPRO:
1608             case PROCESSOR_IAMCU:
1609             case PROCESSOR_GENERIC32:
1610               patt = f32_patt;
1611               break;
1612             }
1613         }
1614       else
1615         {
1616           switch (fragP->tc_frag_data.tune)
1617             {
1618             case PROCESSOR_UNKNOWN:
1619               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1620                  PROCESSOR_UNKNOWN.  */
1621               abort ();
1622               break;
1623
1624             case PROCESSOR_I386:
1625             case PROCESSOR_I486:
1626             case PROCESSOR_PENTIUM:
1627             case PROCESSOR_IAMCU:
1628             case PROCESSOR_K6:
1629             case PROCESSOR_ATHLON:
1630             case PROCESSOR_K8:
1631             case PROCESSOR_AMDFAM10:
1632             case PROCESSOR_BD:
1633             case PROCESSOR_ZNVER:
1634             case PROCESSOR_BT:
1635             case PROCESSOR_GENERIC32:
1636               /* We use cpu_arch_isa_flags to check if we CAN optimize
1637                  with nops.  */
1638               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1639                 patt = alt_patt;
1640               else
1641                 patt = f32_patt;
1642               break;
1643             case PROCESSOR_PENTIUMPRO:
1644             case PROCESSOR_PENTIUM4:
1645             case PROCESSOR_NOCONA:
1646             case PROCESSOR_CORE:
1647             case PROCESSOR_CORE2:
1648             case PROCESSOR_COREI7:
1649             case PROCESSOR_L1OM:
1650             case PROCESSOR_K1OM:
1651               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1652                 patt = alt_patt;
1653               else
1654                 patt = f32_patt;
1655               break;
1656             case PROCESSOR_GENERIC64:
1657               patt = alt_patt;
1658               break;
1659             }
1660         }
1661
1662       if (patt == f32_patt)
1663         {
1664           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1665           /* Limit number of NOPs to 2 for older processors.  */
1666           max_number_of_nops = 2;
1667         }
1668       else
1669         {
1670           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1671           /* Limit number of NOPs to 7 for newer processors.  */
1672           max_number_of_nops = 7;
1673         }
1674     }
1675
1676   if (limit == 0)
1677     limit = max_single_nop_size;
1678
1679   if (fragP->fr_type == rs_fill_nop)
1680     {
1681       /* Output NOPs for .nop directive.  */
1682       if (limit > max_single_nop_size)
1683         {
1684           as_bad_where (fragP->fr_file, fragP->fr_line,
1685                         _("invalid single nop size: %d "
1686                           "(expect within [0, %d])"),
1687                         limit, max_single_nop_size);
1688           return;
1689         }
1690     }
1691   else if (fragP->fr_type != rs_machine_dependent)
1692     fragP->fr_var = count;
1693
1694   if ((count / max_single_nop_size) > max_number_of_nops)
1695     {
1696       /* Generate jump over NOPs.  */
1697       offsetT disp = count - 2;
1698       if (fits_in_imm7 (disp))
1699         {
1700           /* Use "jmp disp8" if possible.  */
1701           count = disp;
1702           where[0] = jump_disp8[0];
1703           where[1] = count;
1704           where += 2;
1705         }
1706       else
1707         {
1708           unsigned int size_of_jump;
1709
1710           if (flag_code == CODE_16BIT)
1711             {
1712               where[0] = jump16_disp32[0];
1713               where[1] = jump16_disp32[1];
1714               size_of_jump = 2;
1715             }
1716           else
1717             {
1718               where[0] = jump32_disp32[0];
1719               size_of_jump = 1;
1720             }
1721
1722           count -= size_of_jump + 4;
1723           if (!fits_in_imm31 (count))
1724             {
1725               as_bad_where (fragP->fr_file, fragP->fr_line,
1726                             _("jump over nop padding out of range"));
1727               return;
1728             }
1729
1730           md_number_to_chars (where + size_of_jump, count, 4);
1731           where += size_of_jump + 4;
1732         }
1733     }
1734
1735   /* Generate multiple NOPs.  */
1736   i386_output_nops (where, patt, count, limit);
1737 }
1738
1739 static INLINE int
1740 operand_type_all_zero (const union i386_operand_type *x)
1741 {
1742   switch (ARRAY_SIZE(x->array))
1743     {
1744     case 3:
1745       if (x->array[2])
1746         return 0;
1747       /* Fall through.  */
1748     case 2:
1749       if (x->array[1])
1750         return 0;
1751       /* Fall through.  */
1752     case 1:
1753       return !x->array[0];
1754     default:
1755       abort ();
1756     }
1757 }
1758
1759 static INLINE void
1760 operand_type_set (union i386_operand_type *x, unsigned int v)
1761 {
1762   switch (ARRAY_SIZE(x->array))
1763     {
1764     case 3:
1765       x->array[2] = v;
1766       /* Fall through.  */
1767     case 2:
1768       x->array[1] = v;
1769       /* Fall through.  */
1770     case 1:
1771       x->array[0] = v;
1772       /* Fall through.  */
1773       break;
1774     default:
1775       abort ();
1776     }
1777
1778   x->bitfield.class = ClassNone;
1779   x->bitfield.instance = InstanceNone;
1780 }
1781
1782 static INLINE int
1783 operand_type_equal (const union i386_operand_type *x,
1784                     const union i386_operand_type *y)
1785 {
1786   switch (ARRAY_SIZE(x->array))
1787     {
1788     case 3:
1789       if (x->array[2] != y->array[2])
1790         return 0;
1791       /* Fall through.  */
1792     case 2:
1793       if (x->array[1] != y->array[1])
1794         return 0;
1795       /* Fall through.  */
1796     case 1:
1797       return x->array[0] == y->array[0];
1798       break;
1799     default:
1800       abort ();
1801     }
1802 }
1803
1804 static INLINE int
1805 cpu_flags_all_zero (const union i386_cpu_flags *x)
1806 {
1807   switch (ARRAY_SIZE(x->array))
1808     {
1809     case 4:
1810       if (x->array[3])
1811         return 0;
1812       /* Fall through.  */
1813     case 3:
1814       if (x->array[2])
1815         return 0;
1816       /* Fall through.  */
1817     case 2:
1818       if (x->array[1])
1819         return 0;
1820       /* Fall through.  */
1821     case 1:
1822       return !x->array[0];
1823     default:
1824       abort ();
1825     }
1826 }
1827
1828 static INLINE int
1829 cpu_flags_equal (const union i386_cpu_flags *x,
1830                  const union i386_cpu_flags *y)
1831 {
1832   switch (ARRAY_SIZE(x->array))
1833     {
1834     case 4:
1835       if (x->array[3] != y->array[3])
1836         return 0;
1837       /* Fall through.  */
1838     case 3:
1839       if (x->array[2] != y->array[2])
1840         return 0;
1841       /* Fall through.  */
1842     case 2:
1843       if (x->array[1] != y->array[1])
1844         return 0;
1845       /* Fall through.  */
1846     case 1:
1847       return x->array[0] == y->array[0];
1848       break;
1849     default:
1850       abort ();
1851     }
1852 }
1853
1854 static INLINE int
1855 cpu_flags_check_cpu64 (i386_cpu_flags f)
1856 {
1857   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1858            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1859 }
1860
1861 static INLINE i386_cpu_flags
1862 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1863 {
1864   switch (ARRAY_SIZE (x.array))
1865     {
1866     case 4:
1867       x.array [3] &= y.array [3];
1868       /* Fall through.  */
1869     case 3:
1870       x.array [2] &= y.array [2];
1871       /* Fall through.  */
1872     case 2:
1873       x.array [1] &= y.array [1];
1874       /* Fall through.  */
1875     case 1:
1876       x.array [0] &= y.array [0];
1877       break;
1878     default:
1879       abort ();
1880     }
1881   return x;
1882 }
1883
1884 static INLINE i386_cpu_flags
1885 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1886 {
1887   switch (ARRAY_SIZE (x.array))
1888     {
1889     case 4:
1890       x.array [3] |= y.array [3];
1891       /* Fall through.  */
1892     case 3:
1893       x.array [2] |= y.array [2];
1894       /* Fall through.  */
1895     case 2:
1896       x.array [1] |= y.array [1];
1897       /* Fall through.  */
1898     case 1:
1899       x.array [0] |= y.array [0];
1900       break;
1901     default:
1902       abort ();
1903     }
1904   return x;
1905 }
1906
1907 static INLINE i386_cpu_flags
1908 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1909 {
1910   switch (ARRAY_SIZE (x.array))
1911     {
1912     case 4:
1913       x.array [3] &= ~y.array [3];
1914       /* Fall through.  */
1915     case 3:
1916       x.array [2] &= ~y.array [2];
1917       /* Fall through.  */
1918     case 2:
1919       x.array [1] &= ~y.array [1];
1920       /* Fall through.  */
1921     case 1:
1922       x.array [0] &= ~y.array [0];
1923       break;
1924     default:
1925       abort ();
1926     }
1927   return x;
1928 }
1929
1930 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1931
1932 #define CPU_FLAGS_ARCH_MATCH            0x1
1933 #define CPU_FLAGS_64BIT_MATCH           0x2
1934
1935 #define CPU_FLAGS_PERFECT_MATCH \
1936   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1937
1938 /* Return CPU flags match bits. */
1939
1940 static int
1941 cpu_flags_match (const insn_template *t)
1942 {
1943   i386_cpu_flags x = t->cpu_flags;
1944   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1945
1946   x.bitfield.cpu64 = 0;
1947   x.bitfield.cpuno64 = 0;
1948
1949   if (cpu_flags_all_zero (&x))
1950     {
1951       /* This instruction is available on all archs.  */
1952       match |= CPU_FLAGS_ARCH_MATCH;
1953     }
1954   else
1955     {
1956       /* This instruction is available only on some archs.  */
1957       i386_cpu_flags cpu = cpu_arch_flags;
1958
1959       /* AVX512VL is no standalone feature - match it and then strip it.  */
1960       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1961         return match;
1962       x.bitfield.cpuavx512vl = 0;
1963
1964       cpu = cpu_flags_and (x, cpu);
1965       if (!cpu_flags_all_zero (&cpu))
1966         {
1967           if (x.bitfield.cpuavx)
1968             {
1969               /* We need to check a few extra flags with AVX.  */
1970               if (cpu.bitfield.cpuavx
1971                   && (!t->opcode_modifier.sse2avx
1972                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1973                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1974                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1975                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1976                 match |= CPU_FLAGS_ARCH_MATCH;
1977             }
1978           else if (x.bitfield.cpuavx512f)
1979             {
1980               /* We need to check a few extra flags with AVX512F.  */
1981               if (cpu.bitfield.cpuavx512f
1982                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1983                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1984                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1985                 match |= CPU_FLAGS_ARCH_MATCH;
1986             }
1987           else
1988             match |= CPU_FLAGS_ARCH_MATCH;
1989         }
1990     }
1991   return match;
1992 }
1993
1994 static INLINE i386_operand_type
1995 operand_type_and (i386_operand_type x, i386_operand_type y)
1996 {
1997   if (x.bitfield.class != y.bitfield.class)
1998     x.bitfield.class = ClassNone;
1999   if (x.bitfield.instance != y.bitfield.instance)
2000     x.bitfield.instance = InstanceNone;
2001
2002   switch (ARRAY_SIZE (x.array))
2003     {
2004     case 3:
2005       x.array [2] &= y.array [2];
2006       /* Fall through.  */
2007     case 2:
2008       x.array [1] &= y.array [1];
2009       /* Fall through.  */
2010     case 1:
2011       x.array [0] &= y.array [0];
2012       break;
2013     default:
2014       abort ();
2015     }
2016   return x;
2017 }
2018
2019 static INLINE i386_operand_type
2020 operand_type_and_not (i386_operand_type x, i386_operand_type y)
2021 {
2022   gas_assert (y.bitfield.class == ClassNone);
2023   gas_assert (y.bitfield.instance == InstanceNone);
2024
2025   switch (ARRAY_SIZE (x.array))
2026     {
2027     case 3:
2028       x.array [2] &= ~y.array [2];
2029       /* Fall through.  */
2030     case 2:
2031       x.array [1] &= ~y.array [1];
2032       /* Fall through.  */
2033     case 1:
2034       x.array [0] &= ~y.array [0];
2035       break;
2036     default:
2037       abort ();
2038     }
2039   return x;
2040 }
2041
2042 static INLINE i386_operand_type
2043 operand_type_or (i386_operand_type x, i386_operand_type y)
2044 {
2045   gas_assert (x.bitfield.class == ClassNone ||
2046               y.bitfield.class == ClassNone ||
2047               x.bitfield.class == y.bitfield.class);
2048   gas_assert (x.bitfield.instance == InstanceNone ||
2049               y.bitfield.instance == InstanceNone ||
2050               x.bitfield.instance == y.bitfield.instance);
2051
2052   switch (ARRAY_SIZE (x.array))
2053     {
2054     case 3:
2055       x.array [2] |= y.array [2];
2056       /* Fall through.  */
2057     case 2:
2058       x.array [1] |= y.array [1];
2059       /* Fall through.  */
2060     case 1:
2061       x.array [0] |= y.array [0];
2062       break;
2063     default:
2064       abort ();
2065     }
2066   return x;
2067 }
2068
2069 static INLINE i386_operand_type
2070 operand_type_xor (i386_operand_type x, i386_operand_type y)
2071 {
2072   gas_assert (y.bitfield.class == ClassNone);
2073   gas_assert (y.bitfield.instance == InstanceNone);
2074
2075   switch (ARRAY_SIZE (x.array))
2076     {
2077     case 3:
2078       x.array [2] ^= y.array [2];
2079       /* Fall through.  */
2080     case 2:
2081       x.array [1] ^= y.array [1];
2082       /* Fall through.  */
2083     case 1:
2084       x.array [0] ^= y.array [0];
2085       break;
2086     default:
2087       abort ();
2088     }
2089   return x;
2090 }
2091
2092 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2093 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2094 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2095 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2096 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2097 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2098 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2099 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2100 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2101 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2102 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2103 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2104 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2105 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2106 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2107 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2108 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2109
2110 enum operand_type
2111 {
2112   reg,
2113   imm,
2114   disp,
2115   anymem
2116 };
2117
2118 static INLINE int
2119 operand_type_check (i386_operand_type t, enum operand_type c)
2120 {
2121   switch (c)
2122     {
2123     case reg:
2124       return t.bitfield.class == Reg;
2125
2126     case imm:
2127       return (t.bitfield.imm8
2128               || t.bitfield.imm8s
2129               || t.bitfield.imm16
2130               || t.bitfield.imm32
2131               || t.bitfield.imm32s
2132               || t.bitfield.imm64);
2133
2134     case disp:
2135       return (t.bitfield.disp8
2136               || t.bitfield.disp16
2137               || t.bitfield.disp32
2138               || t.bitfield.disp32s
2139               || t.bitfield.disp64);
2140
2141     case anymem:
2142       return (t.bitfield.disp8
2143               || t.bitfield.disp16
2144               || t.bitfield.disp32
2145               || t.bitfield.disp32s
2146               || t.bitfield.disp64
2147               || t.bitfield.baseindex);
2148
2149     default:
2150       abort ();
2151     }
2152
2153   return 0;
2154 }
2155
2156 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2157    between operand GIVEN and opeand WANTED for instruction template T.  */
2158
2159 static INLINE int
2160 match_operand_size (const insn_template *t, unsigned int wanted,
2161                     unsigned int given)
2162 {
2163   return !((i.types[given].bitfield.byte
2164             && !t->operand_types[wanted].bitfield.byte)
2165            || (i.types[given].bitfield.word
2166                && !t->operand_types[wanted].bitfield.word)
2167            || (i.types[given].bitfield.dword
2168                && !t->operand_types[wanted].bitfield.dword)
2169            || (i.types[given].bitfield.qword
2170                && !t->operand_types[wanted].bitfield.qword)
2171            || (i.types[given].bitfield.tbyte
2172                && !t->operand_types[wanted].bitfield.tbyte));
2173 }
2174
2175 /* Return 1 if there is no conflict in SIMD register between operand
2176    GIVEN and opeand WANTED for instruction template T.  */
2177
2178 static INLINE int
2179 match_simd_size (const insn_template *t, unsigned int wanted,
2180                  unsigned int given)
2181 {
2182   return !((i.types[given].bitfield.xmmword
2183             && !t->operand_types[wanted].bitfield.xmmword)
2184            || (i.types[given].bitfield.ymmword
2185                && !t->operand_types[wanted].bitfield.ymmword)
2186            || (i.types[given].bitfield.zmmword
2187                && !t->operand_types[wanted].bitfield.zmmword)
2188            || (i.types[given].bitfield.tmmword
2189                && !t->operand_types[wanted].bitfield.tmmword));
2190 }
2191
2192 /* Return 1 if there is no conflict in any size between operand GIVEN
2193    and opeand WANTED for instruction template T.  */
2194
2195 static INLINE int
2196 match_mem_size (const insn_template *t, unsigned int wanted,
2197                 unsigned int given)
2198 {
2199   return (match_operand_size (t, wanted, given)
2200           && !((i.types[given].bitfield.unspecified
2201                 && !i.broadcast.type
2202                 && !t->operand_types[wanted].bitfield.unspecified)
2203                || (i.types[given].bitfield.fword
2204                    && !t->operand_types[wanted].bitfield.fword)
2205                /* For scalar opcode templates to allow register and memory
2206                   operands at the same time, some special casing is needed
2207                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2208                   down-conversion vpmov*.  */
2209                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2210                     && t->operand_types[wanted].bitfield.byte
2211                        + t->operand_types[wanted].bitfield.word
2212                        + t->operand_types[wanted].bitfield.dword
2213                        + t->operand_types[wanted].bitfield.qword
2214                        > !!t->opcode_modifier.broadcast)
2215                    ? (i.types[given].bitfield.xmmword
2216                       || i.types[given].bitfield.ymmword
2217                       || i.types[given].bitfield.zmmword)
2218                    : !match_simd_size(t, wanted, given))));
2219 }
2220
2221 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2222    operands for instruction template T, and it has MATCH_REVERSE set if there
2223    is no size conflict on any operands for the template with operands reversed
2224    (and the template allows for reversing in the first place).  */
2225
2226 #define MATCH_STRAIGHT 1
2227 #define MATCH_REVERSE  2
2228
2229 static INLINE unsigned int
2230 operand_size_match (const insn_template *t)
2231 {
2232   unsigned int j, match = MATCH_STRAIGHT;
2233
2234   /* Don't check non-absolute jump instructions.  */
2235   if (t->opcode_modifier.jump
2236       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2237     return match;
2238
2239   /* Check memory and accumulator operand size.  */
2240   for (j = 0; j < i.operands; j++)
2241     {
2242       if (i.types[j].bitfield.class != Reg
2243           && i.types[j].bitfield.class != RegSIMD
2244           && t->opcode_modifier.anysize)
2245         continue;
2246
2247       if (t->operand_types[j].bitfield.class == Reg
2248           && !match_operand_size (t, j, j))
2249         {
2250           match = 0;
2251           break;
2252         }
2253
2254       if (t->operand_types[j].bitfield.class == RegSIMD
2255           && !match_simd_size (t, j, j))
2256         {
2257           match = 0;
2258           break;
2259         }
2260
2261       if (t->operand_types[j].bitfield.instance == Accum
2262           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2263         {
2264           match = 0;
2265           break;
2266         }
2267
2268       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2269         {
2270           match = 0;
2271           break;
2272         }
2273     }
2274
2275   if (!t->opcode_modifier.d)
2276     {
2277     mismatch:
2278       if (!match)
2279         i.error = operand_size_mismatch;
2280       return match;
2281     }
2282
2283   /* Check reverse.  */
2284   gas_assert (i.operands >= 2 && i.operands <= 3);
2285
2286   for (j = 0; j < i.operands; j++)
2287     {
2288       unsigned int given = i.operands - j - 1;
2289
2290       if (t->operand_types[j].bitfield.class == Reg
2291           && !match_operand_size (t, j, given))
2292         goto mismatch;
2293
2294       if (t->operand_types[j].bitfield.class == RegSIMD
2295           && !match_simd_size (t, j, given))
2296         goto mismatch;
2297
2298       if (t->operand_types[j].bitfield.instance == Accum
2299           && (!match_operand_size (t, j, given)
2300               || !match_simd_size (t, j, given)))
2301         goto mismatch;
2302
2303       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2304         goto mismatch;
2305     }
2306
2307   return match | MATCH_REVERSE;
2308 }
2309
2310 static INLINE int
2311 operand_type_match (i386_operand_type overlap,
2312                     i386_operand_type given)
2313 {
2314   i386_operand_type temp = overlap;
2315
2316   temp.bitfield.unspecified = 0;
2317   temp.bitfield.byte = 0;
2318   temp.bitfield.word = 0;
2319   temp.bitfield.dword = 0;
2320   temp.bitfield.fword = 0;
2321   temp.bitfield.qword = 0;
2322   temp.bitfield.tbyte = 0;
2323   temp.bitfield.xmmword = 0;
2324   temp.bitfield.ymmword = 0;
2325   temp.bitfield.zmmword = 0;
2326   temp.bitfield.tmmword = 0;
2327   if (operand_type_all_zero (&temp))
2328     goto mismatch;
2329
2330   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2331     return 1;
2332
2333  mismatch:
2334   i.error = operand_type_mismatch;
2335   return 0;
2336 }
2337
2338 /* If given types g0 and g1 are registers they must be of the same type
2339    unless the expected operand type register overlap is null.
2340    Some Intel syntax memory operand size checking also happens here.  */
2341
2342 static INLINE int
2343 operand_type_register_match (i386_operand_type g0,
2344                              i386_operand_type t0,
2345                              i386_operand_type g1,
2346                              i386_operand_type t1)
2347 {
2348   if (g0.bitfield.class != Reg
2349       && g0.bitfield.class != RegSIMD
2350       && (!operand_type_check (g0, anymem)
2351           || g0.bitfield.unspecified
2352           || (t0.bitfield.class != Reg
2353               && t0.bitfield.class != RegSIMD)))
2354     return 1;
2355
2356   if (g1.bitfield.class != Reg
2357       && g1.bitfield.class != RegSIMD
2358       && (!operand_type_check (g1, anymem)
2359           || g1.bitfield.unspecified
2360           || (t1.bitfield.class != Reg
2361               && t1.bitfield.class != RegSIMD)))
2362     return 1;
2363
2364   if (g0.bitfield.byte == g1.bitfield.byte
2365       && g0.bitfield.word == g1.bitfield.word
2366       && g0.bitfield.dword == g1.bitfield.dword
2367       && g0.bitfield.qword == g1.bitfield.qword
2368       && g0.bitfield.xmmword == g1.bitfield.xmmword
2369       && g0.bitfield.ymmword == g1.bitfield.ymmword
2370       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2371     return 1;
2372
2373   if (!(t0.bitfield.byte & t1.bitfield.byte)
2374       && !(t0.bitfield.word & t1.bitfield.word)
2375       && !(t0.bitfield.dword & t1.bitfield.dword)
2376       && !(t0.bitfield.qword & t1.bitfield.qword)
2377       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2378       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2379       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2380     return 1;
2381
2382   i.error = register_type_mismatch;
2383
2384   return 0;
2385 }
2386
2387 static INLINE unsigned int
2388 register_number (const reg_entry *r)
2389 {
2390   unsigned int nr = r->reg_num;
2391
2392   if (r->reg_flags & RegRex)
2393     nr += 8;
2394
2395   if (r->reg_flags & RegVRex)
2396     nr += 16;
2397
2398   return nr;
2399 }
2400
2401 static INLINE unsigned int
2402 mode_from_disp_size (i386_operand_type t)
2403 {
2404   if (t.bitfield.disp8)
2405     return 1;
2406   else if (t.bitfield.disp16
2407            || t.bitfield.disp32
2408            || t.bitfield.disp32s)
2409     return 2;
2410   else
2411     return 0;
2412 }
2413
2414 static INLINE int
2415 fits_in_signed_byte (addressT num)
2416 {
2417   return num + 0x80 <= 0xff;
2418 }
2419
2420 static INLINE int
2421 fits_in_unsigned_byte (addressT num)
2422 {
2423   return num <= 0xff;
2424 }
2425
2426 static INLINE int
2427 fits_in_unsigned_word (addressT num)
2428 {
2429   return num <= 0xffff;
2430 }
2431
2432 static INLINE int
2433 fits_in_signed_word (addressT num)
2434 {
2435   return num + 0x8000 <= 0xffff;
2436 }
2437
2438 static INLINE int
2439 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2440 {
2441 #ifndef BFD64
2442   return 1;
2443 #else
2444   return num + 0x80000000 <= 0xffffffff;
2445 #endif
2446 }                               /* fits_in_signed_long() */
2447
2448 static INLINE int
2449 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2450 {
2451 #ifndef BFD64
2452   return 1;
2453 #else
2454   return num <= 0xffffffff;
2455 #endif
2456 }                               /* fits_in_unsigned_long() */
2457
2458 static INLINE valueT extend_to_32bit_address (addressT num)
2459 {
2460 #ifdef BFD64
2461   if (fits_in_unsigned_long(num))
2462     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2463
2464   if (!fits_in_signed_long (num))
2465     return num & 0xffffffff;
2466 #endif
2467
2468   return num;
2469 }
2470
2471 static INLINE int
2472 fits_in_disp8 (offsetT num)
2473 {
2474   int shift = i.memshift;
2475   unsigned int mask;
2476
2477   if (shift == -1)
2478     abort ();
2479
2480   mask = (1 << shift) - 1;
2481
2482   /* Return 0 if NUM isn't properly aligned.  */
2483   if ((num & mask))
2484     return 0;
2485
2486   /* Check if NUM will fit in 8bit after shift.  */
2487   return fits_in_signed_byte (num >> shift);
2488 }
2489
2490 static INLINE int
2491 fits_in_imm4 (offsetT num)
2492 {
2493   return (num & 0xf) == num;
2494 }
2495
2496 static i386_operand_type
2497 smallest_imm_type (offsetT num)
2498 {
2499   i386_operand_type t;
2500
2501   operand_type_set (&t, 0);
2502   t.bitfield.imm64 = 1;
2503
2504   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2505     {
2506       /* This code is disabled on the 486 because all the Imm1 forms
2507          in the opcode table are slower on the i486.  They're the
2508          versions with the implicitly specified single-position
2509          displacement, which has another syntax if you really want to
2510          use that form.  */
2511       t.bitfield.imm1 = 1;
2512       t.bitfield.imm8 = 1;
2513       t.bitfield.imm8s = 1;
2514       t.bitfield.imm16 = 1;
2515       t.bitfield.imm32 = 1;
2516       t.bitfield.imm32s = 1;
2517     }
2518   else if (fits_in_signed_byte (num))
2519     {
2520       t.bitfield.imm8 = 1;
2521       t.bitfield.imm8s = 1;
2522       t.bitfield.imm16 = 1;
2523       t.bitfield.imm32 = 1;
2524       t.bitfield.imm32s = 1;
2525     }
2526   else if (fits_in_unsigned_byte (num))
2527     {
2528       t.bitfield.imm8 = 1;
2529       t.bitfield.imm16 = 1;
2530       t.bitfield.imm32 = 1;
2531       t.bitfield.imm32s = 1;
2532     }
2533   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2534     {
2535       t.bitfield.imm16 = 1;
2536       t.bitfield.imm32 = 1;
2537       t.bitfield.imm32s = 1;
2538     }
2539   else if (fits_in_signed_long (num))
2540     {
2541       t.bitfield.imm32 = 1;
2542       t.bitfield.imm32s = 1;
2543     }
2544   else if (fits_in_unsigned_long (num))
2545     t.bitfield.imm32 = 1;
2546
2547   return t;
2548 }
2549
2550 static offsetT
2551 offset_in_range (offsetT val, int size)
2552 {
2553   addressT mask;
2554
2555   switch (size)
2556     {
2557     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2558     case 2: mask = ((addressT) 1 << 16) - 1; break;
2559     case 4: mask = ((addressT) 2 << 31) - 1; break;
2560 #ifdef BFD64
2561     case 8: mask = ((addressT) 2 << 63) - 1; break;
2562 #endif
2563     default: abort ();
2564     }
2565
2566   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2567     {
2568       char buf1[40], buf2[40];
2569
2570       bfd_sprintf_vma (stdoutput, buf1, val);
2571       bfd_sprintf_vma (stdoutput, buf2, val & mask);
2572       as_warn (_("%s shortened to %s"), buf1, buf2);
2573     }
2574   return val & mask;
2575 }
2576
2577 enum PREFIX_GROUP
2578 {
2579   PREFIX_EXIST = 0,
2580   PREFIX_LOCK,
2581   PREFIX_REP,
2582   PREFIX_DS,
2583   PREFIX_OTHER
2584 };
2585
2586 /* Returns
2587    a. PREFIX_EXIST if attempting to add a prefix where one from the
2588    same class already exists.
2589    b. PREFIX_LOCK if lock prefix is added.
2590    c. PREFIX_REP if rep/repne prefix is added.
2591    d. PREFIX_DS if ds prefix is added.
2592    e. PREFIX_OTHER if other prefix is added.
2593  */
2594
2595 static enum PREFIX_GROUP
2596 add_prefix (unsigned int prefix)
2597 {
2598   enum PREFIX_GROUP ret = PREFIX_OTHER;
2599   unsigned int q;
2600
2601   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2602       && flag_code == CODE_64BIT)
2603     {
2604       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2605           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2606           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2607           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2608         ret = PREFIX_EXIST;
2609       q = REX_PREFIX;
2610     }
2611   else
2612     {
2613       switch (prefix)
2614         {
2615         default:
2616           abort ();
2617
2618         case DS_PREFIX_OPCODE:
2619           ret = PREFIX_DS;
2620           /* Fall through.  */
2621         case CS_PREFIX_OPCODE:
2622         case ES_PREFIX_OPCODE:
2623         case FS_PREFIX_OPCODE:
2624         case GS_PREFIX_OPCODE:
2625         case SS_PREFIX_OPCODE:
2626           q = SEG_PREFIX;
2627           break;
2628
2629         case REPNE_PREFIX_OPCODE:
2630         case REPE_PREFIX_OPCODE:
2631           q = REP_PREFIX;
2632           ret = PREFIX_REP;
2633           break;
2634
2635         case LOCK_PREFIX_OPCODE:
2636           q = LOCK_PREFIX;
2637           ret = PREFIX_LOCK;
2638           break;
2639
2640         case FWAIT_OPCODE:
2641           q = WAIT_PREFIX;
2642           break;
2643
2644         case ADDR_PREFIX_OPCODE:
2645           q = ADDR_PREFIX;
2646           break;
2647
2648         case DATA_PREFIX_OPCODE:
2649           q = DATA_PREFIX;
2650           break;
2651         }
2652       if (i.prefix[q] != 0)
2653         ret = PREFIX_EXIST;
2654     }
2655
2656   if (ret)
2657     {
2658       if (!i.prefix[q])
2659         ++i.prefixes;
2660       i.prefix[q] |= prefix;
2661     }
2662   else
2663     as_bad (_("same type of prefix used twice"));
2664
2665   return ret;
2666 }
2667
2668 static void
2669 update_code_flag (int value, int check)
2670 {
2671   PRINTF_LIKE ((*as_error));
2672
2673   flag_code = (enum flag_code) value;
2674   if (flag_code == CODE_64BIT)
2675     {
2676       cpu_arch_flags.bitfield.cpu64 = 1;
2677       cpu_arch_flags.bitfield.cpuno64 = 0;
2678     }
2679   else
2680     {
2681       cpu_arch_flags.bitfield.cpu64 = 0;
2682       cpu_arch_flags.bitfield.cpuno64 = 1;
2683     }
2684   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2685     {
2686       if (check)
2687         as_error = as_fatal;
2688       else
2689         as_error = as_bad;
2690       (*as_error) (_("64bit mode not supported on `%s'."),
2691                    cpu_arch_name ? cpu_arch_name : default_arch);
2692     }
2693   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2694     {
2695       if (check)
2696         as_error = as_fatal;
2697       else
2698         as_error = as_bad;
2699       (*as_error) (_("32bit mode not supported on `%s'."),
2700                    cpu_arch_name ? cpu_arch_name : default_arch);
2701     }
2702   stackop_size = '\0';
2703 }
2704
2705 static void
2706 set_code_flag (int value)
2707 {
2708   update_code_flag (value, 0);
2709 }
2710
2711 static void
2712 set_16bit_gcc_code_flag (int new_code_flag)
2713 {
2714   flag_code = (enum flag_code) new_code_flag;
2715   if (flag_code != CODE_16BIT)
2716     abort ();
2717   cpu_arch_flags.bitfield.cpu64 = 0;
2718   cpu_arch_flags.bitfield.cpuno64 = 1;
2719   stackop_size = LONG_MNEM_SUFFIX;
2720 }
2721
2722 static void
2723 set_intel_syntax (int syntax_flag)
2724 {
2725   /* Find out if register prefixing is specified.  */
2726   int ask_naked_reg = 0;
2727
2728   SKIP_WHITESPACE ();
2729   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2730     {
2731       char *string;
2732       int e = get_symbol_name (&string);
2733
2734       if (strcmp (string, "prefix") == 0)
2735         ask_naked_reg = 1;
2736       else if (strcmp (string, "noprefix") == 0)
2737         ask_naked_reg = -1;
2738       else
2739         as_bad (_("bad argument to syntax directive."));
2740       (void) restore_line_pointer (e);
2741     }
2742   demand_empty_rest_of_line ();
2743
2744   intel_syntax = syntax_flag;
2745
2746   if (ask_naked_reg == 0)
2747     allow_naked_reg = (intel_syntax
2748                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2749   else
2750     allow_naked_reg = (ask_naked_reg < 0);
2751
2752   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2753
2754   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2755   identifier_chars['$'] = intel_syntax ? '$' : 0;
2756   register_prefix = allow_naked_reg ? "" : "%";
2757 }
2758
2759 static void
2760 set_intel_mnemonic (int mnemonic_flag)
2761 {
2762   intel_mnemonic = mnemonic_flag;
2763 }
2764
2765 static void
2766 set_allow_index_reg (int flag)
2767 {
2768   allow_index_reg = flag;
2769 }
2770
2771 static void
2772 set_check (int what)
2773 {
2774   enum check_kind *kind;
2775   const char *str;
2776
2777   if (what)
2778     {
2779       kind = &operand_check;
2780       str = "operand";
2781     }
2782   else
2783     {
2784       kind = &sse_check;
2785       str = "sse";
2786     }
2787
2788   SKIP_WHITESPACE ();
2789
2790   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2791     {
2792       char *string;
2793       int e = get_symbol_name (&string);
2794
2795       if (strcmp (string, "none") == 0)
2796         *kind = check_none;
2797       else if (strcmp (string, "warning") == 0)
2798         *kind = check_warning;
2799       else if (strcmp (string, "error") == 0)
2800         *kind = check_error;
2801       else
2802         as_bad (_("bad argument to %s_check directive."), str);
2803       (void) restore_line_pointer (e);
2804     }
2805   else
2806     as_bad (_("missing argument for %s_check directive"), str);
2807
2808   demand_empty_rest_of_line ();
2809 }
2810
2811 static void
2812 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2813                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2814 {
2815 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2816   static const char *arch;
2817
2818   /* Intel LIOM is only supported on ELF.  */
2819   if (!IS_ELF)
2820     return;
2821
2822   if (!arch)
2823     {
2824       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2825          use default_arch.  */
2826       arch = cpu_arch_name;
2827       if (!arch)
2828         arch = default_arch;
2829     }
2830
2831   /* If we are targeting Intel MCU, we must enable it.  */
2832   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2833       || new_flag.bitfield.cpuiamcu)
2834     return;
2835
2836   /* If we are targeting Intel L1OM, we must enable it.  */
2837   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2838       || new_flag.bitfield.cpul1om)
2839     return;
2840
2841   /* If we are targeting Intel K1OM, we must enable it.  */
2842   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2843       || new_flag.bitfield.cpuk1om)
2844     return;
2845
2846   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2847 #endif
2848 }
2849
2850 static void
2851 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2852 {
2853   SKIP_WHITESPACE ();
2854
2855   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2856     {
2857       char *string;
2858       int e = get_symbol_name (&string);
2859       unsigned int j;
2860       i386_cpu_flags flags;
2861
2862       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2863         {
2864           if (strcmp (string, cpu_arch[j].name) == 0)
2865             {
2866               check_cpu_arch_compatible (string, cpu_arch[j].flags);
2867
2868               if (*string != '.')
2869                 {
2870                   cpu_arch_name = cpu_arch[j].name;
2871                   cpu_sub_arch_name = NULL;
2872                   cpu_arch_flags = cpu_arch[j].flags;
2873                   if (flag_code == CODE_64BIT)
2874                     {
2875                       cpu_arch_flags.bitfield.cpu64 = 1;
2876                       cpu_arch_flags.bitfield.cpuno64 = 0;
2877                     }
2878                   else
2879                     {
2880                       cpu_arch_flags.bitfield.cpu64 = 0;
2881                       cpu_arch_flags.bitfield.cpuno64 = 1;
2882                     }
2883                   cpu_arch_isa = cpu_arch[j].type;
2884                   cpu_arch_isa_flags = cpu_arch[j].flags;
2885                   if (!cpu_arch_tune_set)
2886                     {
2887                       cpu_arch_tune = cpu_arch_isa;
2888                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2889                     }
2890                   break;
2891                 }
2892
2893               flags = cpu_flags_or (cpu_arch_flags,
2894                                     cpu_arch[j].flags);
2895
2896               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2897                 {
2898                   if (cpu_sub_arch_name)
2899                     {
2900                       char *name = cpu_sub_arch_name;
2901                       cpu_sub_arch_name = concat (name,
2902                                                   cpu_arch[j].name,
2903                                                   (const char *) NULL);
2904                       free (name);
2905                     }
2906                   else
2907                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2908                   cpu_arch_flags = flags;
2909                   cpu_arch_isa_flags = flags;
2910                 }
2911               else
2912                 cpu_arch_isa_flags
2913                   = cpu_flags_or (cpu_arch_isa_flags,
2914                                   cpu_arch[j].flags);
2915               (void) restore_line_pointer (e);
2916               demand_empty_rest_of_line ();
2917               return;
2918             }
2919         }
2920
2921       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2922         {
2923           /* Disable an ISA extension.  */
2924           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2925             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2926               {
2927                 flags = cpu_flags_and_not (cpu_arch_flags,
2928                                            cpu_noarch[j].flags);
2929                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2930                   {
2931                     if (cpu_sub_arch_name)
2932                       {
2933                         char *name = cpu_sub_arch_name;
2934                         cpu_sub_arch_name = concat (name, string,
2935                                                     (const char *) NULL);
2936                         free (name);
2937                       }
2938                     else
2939                       cpu_sub_arch_name = xstrdup (string);
2940                     cpu_arch_flags = flags;
2941                     cpu_arch_isa_flags = flags;
2942                   }
2943                 (void) restore_line_pointer (e);
2944                 demand_empty_rest_of_line ();
2945                 return;
2946               }
2947
2948           j = ARRAY_SIZE (cpu_arch);
2949         }
2950
2951       if (j >= ARRAY_SIZE (cpu_arch))
2952         as_bad (_("no such architecture: `%s'"), string);
2953
2954       *input_line_pointer = e;
2955     }
2956   else
2957     as_bad (_("missing cpu architecture"));
2958
2959   no_cond_jump_promotion = 0;
2960   if (*input_line_pointer == ','
2961       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2962     {
2963       char *string;
2964       char e;
2965
2966       ++input_line_pointer;
2967       e = get_symbol_name (&string);
2968
2969       if (strcmp (string, "nojumps") == 0)
2970         no_cond_jump_promotion = 1;
2971       else if (strcmp (string, "jumps") == 0)
2972         ;
2973       else
2974         as_bad (_("no such architecture modifier: `%s'"), string);
2975
2976       (void) restore_line_pointer (e);
2977     }
2978
2979   demand_empty_rest_of_line ();
2980 }
2981
2982 enum bfd_architecture
2983 i386_arch (void)
2984 {
2985   if (cpu_arch_isa == PROCESSOR_L1OM)
2986     {
2987       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2988           || flag_code != CODE_64BIT)
2989         as_fatal (_("Intel L1OM is 64bit ELF only"));
2990       return bfd_arch_l1om;
2991     }
2992   else if (cpu_arch_isa == PROCESSOR_K1OM)
2993     {
2994       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2995           || flag_code != CODE_64BIT)
2996         as_fatal (_("Intel K1OM is 64bit ELF only"));
2997       return bfd_arch_k1om;
2998     }
2999   else if (cpu_arch_isa == PROCESSOR_IAMCU)
3000     {
3001       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3002           || flag_code == CODE_64BIT)
3003         as_fatal (_("Intel MCU is 32bit ELF only"));
3004       return bfd_arch_iamcu;
3005     }
3006   else
3007     return bfd_arch_i386;
3008 }
3009
3010 unsigned long
3011 i386_mach (void)
3012 {
3013   if (startswith (default_arch, "x86_64"))
3014     {
3015       if (cpu_arch_isa == PROCESSOR_L1OM)
3016         {
3017           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3018               || default_arch[6] != '\0')
3019             as_fatal (_("Intel L1OM is 64bit ELF only"));
3020           return bfd_mach_l1om;
3021         }
3022       else if (cpu_arch_isa == PROCESSOR_K1OM)
3023         {
3024           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3025               || default_arch[6] != '\0')
3026             as_fatal (_("Intel K1OM is 64bit ELF only"));
3027           return bfd_mach_k1om;
3028         }
3029       else if (default_arch[6] == '\0')
3030         return bfd_mach_x86_64;
3031       else
3032         return bfd_mach_x64_32;
3033     }
3034   else if (!strcmp (default_arch, "i386")
3035            || !strcmp (default_arch, "iamcu"))
3036     {
3037       if (cpu_arch_isa == PROCESSOR_IAMCU)
3038         {
3039           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3040             as_fatal (_("Intel MCU is 32bit ELF only"));
3041           return bfd_mach_i386_iamcu;
3042         }
3043       else
3044         return bfd_mach_i386_i386;
3045     }
3046   else
3047     as_fatal (_("unknown architecture"));
3048 }
3049 \f
3050 void
3051 md_begin (void)
3052 {
3053   /* Support pseudo prefixes like {disp32}.  */
3054   lex_type ['{'] = LEX_BEGIN_NAME;
3055
3056   /* Initialize op_hash hash table.  */
3057   op_hash = str_htab_create ();
3058
3059   {
3060     const insn_template *optab;
3061     templates *core_optab;
3062
3063     /* Setup for loop.  */
3064     optab = i386_optab;
3065     core_optab = XNEW (templates);
3066     core_optab->start = optab;
3067
3068     while (1)
3069       {
3070         ++optab;
3071         if (optab->name == NULL
3072             || strcmp (optab->name, (optab - 1)->name) != 0)
3073           {
3074             /* different name --> ship out current template list;
3075                add to hash table; & begin anew.  */
3076             core_optab->end = optab;
3077             if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
3078               as_fatal (_("duplicate %s"), (optab - 1)->name);
3079
3080             if (optab->name == NULL)
3081               break;
3082             core_optab = XNEW (templates);
3083             core_optab->start = optab;
3084           }
3085       }
3086   }
3087
3088   /* Initialize reg_hash hash table.  */
3089   reg_hash = str_htab_create ();
3090   {
3091     const reg_entry *regtab;
3092     unsigned int regtab_size = i386_regtab_size;
3093
3094     for (regtab = i386_regtab; regtab_size--; regtab++)
3095       {
3096         switch (regtab->reg_type.bitfield.class)
3097           {
3098           case Reg:
3099             if (regtab->reg_type.bitfield.dword)
3100               {
3101                 if (regtab->reg_type.bitfield.instance == Accum)
3102                   reg_eax = regtab;
3103               }
3104             else if (regtab->reg_type.bitfield.tbyte)
3105               {
3106                 /* There's no point inserting st(<N>) in the hash table, as
3107                    parentheses aren't included in register_chars[] anyway.  */
3108                 if (regtab->reg_type.bitfield.instance != Accum)
3109                   continue;
3110                 reg_st0 = regtab;
3111               }
3112             break;
3113
3114           case SReg:
3115             switch (regtab->reg_num)
3116               {
3117               case 0: reg_es = regtab; break;
3118               case 2: reg_ss = regtab; break;
3119               case 3: reg_ds = regtab; break;
3120               }
3121             break;
3122
3123           case RegMask:
3124             if (!regtab->reg_num)
3125               reg_k0 = regtab;
3126             break;
3127           }
3128
3129         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3130           as_fatal (_("duplicate %s"), regtab->reg_name);
3131       }
3132   }
3133
3134   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3135   {
3136     int c;
3137     char *p;
3138
3139     for (c = 0; c < 256; c++)
3140       {
3141         if (ISDIGIT (c) || ISLOWER (c))
3142           {
3143             mnemonic_chars[c] = c;
3144             register_chars[c] = c;
3145             operand_chars[c] = c;
3146           }
3147         else if (ISUPPER (c))
3148           {
3149             mnemonic_chars[c] = TOLOWER (c);
3150             register_chars[c] = mnemonic_chars[c];
3151             operand_chars[c] = c;
3152           }
3153         else if (c == '{' || c == '}')
3154           {
3155             mnemonic_chars[c] = c;
3156             operand_chars[c] = c;
3157           }
3158 #ifdef SVR4_COMMENT_CHARS
3159         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3160           operand_chars[c] = c;
3161 #endif
3162
3163         if (ISALPHA (c) || ISDIGIT (c))
3164           identifier_chars[c] = c;
3165         else if (c >= 128)
3166           {
3167             identifier_chars[c] = c;
3168             operand_chars[c] = c;
3169           }
3170       }
3171
3172 #ifdef LEX_AT
3173     identifier_chars['@'] = '@';
3174 #endif
3175 #ifdef LEX_QM
3176     identifier_chars['?'] = '?';
3177     operand_chars['?'] = '?';
3178 #endif
3179     mnemonic_chars['_'] = '_';
3180     mnemonic_chars['-'] = '-';
3181     mnemonic_chars['.'] = '.';
3182     identifier_chars['_'] = '_';
3183     identifier_chars['.'] = '.';
3184
3185     for (p = operand_special_chars; *p != '\0'; p++)
3186       operand_chars[(unsigned char) *p] = *p;
3187   }
3188
3189   if (flag_code == CODE_64BIT)
3190     {
3191 #if defined (OBJ_COFF) && defined (TE_PE)
3192       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3193                                   ? 32 : 16);
3194 #else
3195       x86_dwarf2_return_column = 16;
3196 #endif
3197       x86_cie_data_alignment = -8;
3198     }
3199   else
3200     {
3201       x86_dwarf2_return_column = 8;
3202       x86_cie_data_alignment = -4;
3203     }
3204
3205   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3206      can be turned into BRANCH_PREFIX frag.  */
3207   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3208     abort ();
3209 }
3210
3211 void
3212 i386_print_statistics (FILE *file)
3213 {
3214   htab_print_statistics (file, "i386 opcode", op_hash);
3215   htab_print_statistics (file, "i386 register", reg_hash);
3216 }
3217 \f
3218 #ifdef DEBUG386
3219
3220 /* Debugging routines for md_assemble.  */
3221 static void pte (insn_template *);
3222 static void pt (i386_operand_type);
3223 static void pe (expressionS *);
3224 static void ps (symbolS *);
3225
3226 static void
3227 pi (const char *line, i386_insn *x)
3228 {
3229   unsigned int j;
3230
3231   fprintf (stdout, "%s: template ", line);
3232   pte (&x->tm);
3233   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3234            x->base_reg ? x->base_reg->reg_name : "none",
3235            x->index_reg ? x->index_reg->reg_name : "none",
3236            x->log2_scale_factor);
3237   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3238            x->rm.mode, x->rm.reg, x->rm.regmem);
3239   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3240            x->sib.base, x->sib.index, x->sib.scale);
3241   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3242            (x->rex & REX_W) != 0,
3243            (x->rex & REX_R) != 0,
3244            (x->rex & REX_X) != 0,
3245            (x->rex & REX_B) != 0);
3246   for (j = 0; j < x->operands; j++)
3247     {
3248       fprintf (stdout, "    #%d:  ", j + 1);
3249       pt (x->types[j]);
3250       fprintf (stdout, "\n");
3251       if (x->types[j].bitfield.class == Reg
3252           || x->types[j].bitfield.class == RegMMX
3253           || x->types[j].bitfield.class == RegSIMD
3254           || x->types[j].bitfield.class == RegMask
3255           || x->types[j].bitfield.class == SReg
3256           || x->types[j].bitfield.class == RegCR
3257           || x->types[j].bitfield.class == RegDR
3258           || x->types[j].bitfield.class == RegTR
3259           || x->types[j].bitfield.class == RegBND)
3260         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3261       if (operand_type_check (x->types[j], imm))
3262         pe (x->op[j].imms);
3263       if (operand_type_check (x->types[j], disp))
3264         pe (x->op[j].disps);
3265     }
3266 }
3267
3268 static void
3269 pte (insn_template *t)
3270 {
3271   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3272   static const char *const opc_spc[] = {
3273     NULL, "0f", "0f38", "0f3a", NULL, NULL, NULL, NULL,
3274     "XOP08", "XOP09", "XOP0A",
3275   };
3276   unsigned int j;
3277
3278   fprintf (stdout, " %d operands ", t->operands);
3279   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3280     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3281   if (opc_spc[t->opcode_modifier.opcodespace])
3282     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3283   fprintf (stdout, "opcode %x ", t->base_opcode);
3284   if (t->extension_opcode != None)
3285     fprintf (stdout, "ext %x ", t->extension_opcode);
3286   if (t->opcode_modifier.d)
3287     fprintf (stdout, "D");
3288   if (t->opcode_modifier.w)
3289     fprintf (stdout, "W");
3290   fprintf (stdout, "\n");
3291   for (j = 0; j < t->operands; j++)
3292     {
3293       fprintf (stdout, "    #%d type ", j + 1);
3294       pt (t->operand_types[j]);
3295       fprintf (stdout, "\n");
3296     }
3297 }
3298
3299 static void
3300 pe (expressionS *e)
3301 {
3302   fprintf (stdout, "    operation     %d\n", e->X_op);
3303   fprintf (stdout, "    add_number    %" BFD_VMA_FMT "d (%" BFD_VMA_FMT "x)\n",
3304            e->X_add_number, e->X_add_number);
3305   if (e->X_add_symbol)
3306     {
3307       fprintf (stdout, "    add_symbol    ");
3308       ps (e->X_add_symbol);
3309       fprintf (stdout, "\n");
3310     }
3311   if (e->X_op_symbol)
3312     {
3313       fprintf (stdout, "    op_symbol    ");
3314       ps (e->X_op_symbol);
3315       fprintf (stdout, "\n");
3316     }
3317 }
3318
3319 static void
3320 ps (symbolS *s)
3321 {
3322   fprintf (stdout, "%s type %s%s",
3323            S_GET_NAME (s),
3324            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3325            segment_name (S_GET_SEGMENT (s)));
3326 }
3327
3328 static struct type_name
3329   {
3330     i386_operand_type mask;
3331     const char *name;
3332   }
3333 const type_names[] =
3334 {
3335   { OPERAND_TYPE_REG8, "r8" },
3336   { OPERAND_TYPE_REG16, "r16" },
3337   { OPERAND_TYPE_REG32, "r32" },
3338   { OPERAND_TYPE_REG64, "r64" },
3339   { OPERAND_TYPE_ACC8, "acc8" },
3340   { OPERAND_TYPE_ACC16, "acc16" },
3341   { OPERAND_TYPE_ACC32, "acc32" },
3342   { OPERAND_TYPE_ACC64, "acc64" },
3343   { OPERAND_TYPE_IMM8, "i8" },
3344   { OPERAND_TYPE_IMM8, "i8s" },
3345   { OPERAND_TYPE_IMM16, "i16" },
3346   { OPERAND_TYPE_IMM32, "i32" },
3347   { OPERAND_TYPE_IMM32S, "i32s" },
3348   { OPERAND_TYPE_IMM64, "i64" },
3349   { OPERAND_TYPE_IMM1, "i1" },
3350   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3351   { OPERAND_TYPE_DISP8, "d8" },
3352   { OPERAND_TYPE_DISP16, "d16" },
3353   { OPERAND_TYPE_DISP32, "d32" },
3354   { OPERAND_TYPE_DISP32S, "d32s" },
3355   { OPERAND_TYPE_DISP64, "d64" },
3356   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3357   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3358   { OPERAND_TYPE_CONTROL, "control reg" },
3359   { OPERAND_TYPE_TEST, "test reg" },
3360   { OPERAND_TYPE_DEBUG, "debug reg" },
3361   { OPERAND_TYPE_FLOATREG, "FReg" },
3362   { OPERAND_TYPE_FLOATACC, "FAcc" },
3363   { OPERAND_TYPE_SREG, "SReg" },
3364   { OPERAND_TYPE_REGMMX, "rMMX" },
3365   { OPERAND_TYPE_REGXMM, "rXMM" },
3366   { OPERAND_TYPE_REGYMM, "rYMM" },
3367   { OPERAND_TYPE_REGZMM, "rZMM" },
3368   { OPERAND_TYPE_REGTMM, "rTMM" },
3369   { OPERAND_TYPE_REGMASK, "Mask reg" },
3370 };
3371
3372 static void
3373 pt (i386_operand_type t)
3374 {
3375   unsigned int j;
3376   i386_operand_type a;
3377
3378   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3379     {
3380       a = operand_type_and (t, type_names[j].mask);
3381       if (operand_type_equal (&a, &type_names[j].mask))
3382         fprintf (stdout, "%s, ",  type_names[j].name);
3383     }
3384   fflush (stdout);
3385 }
3386
3387 #endif /* DEBUG386 */
3388 \f
3389 static bfd_reloc_code_real_type
3390 reloc (unsigned int size,
3391        int pcrel,
3392        int sign,
3393        bfd_reloc_code_real_type other)
3394 {
3395   if (other != NO_RELOC)
3396     {
3397       reloc_howto_type *rel;
3398
3399       if (size == 8)
3400         switch (other)
3401           {
3402           case BFD_RELOC_X86_64_GOT32:
3403             return BFD_RELOC_X86_64_GOT64;
3404             break;
3405           case BFD_RELOC_X86_64_GOTPLT64:
3406             return BFD_RELOC_X86_64_GOTPLT64;
3407             break;
3408           case BFD_RELOC_X86_64_PLTOFF64:
3409             return BFD_RELOC_X86_64_PLTOFF64;
3410             break;
3411           case BFD_RELOC_X86_64_GOTPC32:
3412             other = BFD_RELOC_X86_64_GOTPC64;
3413             break;
3414           case BFD_RELOC_X86_64_GOTPCREL:
3415             other = BFD_RELOC_X86_64_GOTPCREL64;
3416             break;
3417           case BFD_RELOC_X86_64_TPOFF32:
3418             other = BFD_RELOC_X86_64_TPOFF64;
3419             break;
3420           case BFD_RELOC_X86_64_DTPOFF32:
3421             other = BFD_RELOC_X86_64_DTPOFF64;
3422             break;
3423           default:
3424             break;
3425           }
3426
3427 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3428       if (other == BFD_RELOC_SIZE32)
3429         {
3430           if (size == 8)
3431             other = BFD_RELOC_SIZE64;
3432           if (pcrel)
3433             {
3434               as_bad (_("there are no pc-relative size relocations"));
3435               return NO_RELOC;
3436             }
3437         }
3438 #endif
3439
3440       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3441       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3442         sign = -1;
3443
3444       rel = bfd_reloc_type_lookup (stdoutput, other);
3445       if (!rel)
3446         as_bad (_("unknown relocation (%u)"), other);
3447       else if (size != bfd_get_reloc_size (rel))
3448         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3449                 bfd_get_reloc_size (rel),
3450                 size);
3451       else if (pcrel && !rel->pc_relative)
3452         as_bad (_("non-pc-relative relocation for pc-relative field"));
3453       else if ((rel->complain_on_overflow == complain_overflow_signed
3454                 && !sign)
3455                || (rel->complain_on_overflow == complain_overflow_unsigned
3456                    && sign > 0))
3457         as_bad (_("relocated field and relocation type differ in signedness"));
3458       else
3459         return other;
3460       return NO_RELOC;
3461     }
3462
3463   if (pcrel)
3464     {
3465       if (!sign)
3466         as_bad (_("there are no unsigned pc-relative relocations"));
3467       switch (size)
3468         {
3469         case 1: return BFD_RELOC_8_PCREL;
3470         case 2: return BFD_RELOC_16_PCREL;
3471         case 4: return BFD_RELOC_32_PCREL;
3472         case 8: return BFD_RELOC_64_PCREL;
3473         }
3474       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3475     }
3476   else
3477     {
3478       if (sign > 0)
3479         switch (size)
3480           {
3481           case 4: return BFD_RELOC_X86_64_32S;
3482           }
3483       else
3484         switch (size)
3485           {
3486           case 1: return BFD_RELOC_8;
3487           case 2: return BFD_RELOC_16;
3488           case 4: return BFD_RELOC_32;
3489           case 8: return BFD_RELOC_64;
3490           }
3491       as_bad (_("cannot do %s %u byte relocation"),
3492               sign > 0 ? "signed" : "unsigned", size);
3493     }
3494
3495   return NO_RELOC;
3496 }
3497
3498 /* Here we decide which fixups can be adjusted to make them relative to
3499    the beginning of the section instead of the symbol.  Basically we need
3500    to make sure that the dynamic relocations are done correctly, so in
3501    some cases we force the original symbol to be used.  */
3502
3503 int
3504 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3505 {
3506 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3507   if (!IS_ELF)
3508     return 1;
3509
3510   /* Don't adjust pc-relative references to merge sections in 64-bit
3511      mode.  */
3512   if (use_rela_relocations
3513       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3514       && fixP->fx_pcrel)
3515     return 0;
3516
3517   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3518      and changed later by validate_fix.  */
3519   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3520       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3521     return 0;
3522
3523   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3524      for size relocations.  */
3525   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3526       || fixP->fx_r_type == BFD_RELOC_SIZE64
3527       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3528       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3529       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3530       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3531       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3532       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3533       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3534       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3535       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3536       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3537       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3538       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3539       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3540       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3541       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3542       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3543       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3544       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3545       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3546       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3547       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3548       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3549       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3550       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3551       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3552       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3553       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3554       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3555       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3556     return 0;
3557 #endif
3558   return 1;
3559 }
3560
3561 static INLINE bool
3562 want_disp32 (const insn_template *t)
3563 {
3564   return flag_code != CODE_64BIT
3565          || i.prefix[ADDR_PREFIX]
3566          || (t->base_opcode == 0x8d
3567              && t->opcode_modifier.opcodespace == SPACE_BASE
3568              && (!i.types[1].bitfield.qword
3569                 || t->opcode_modifier.size == SIZE32));
3570 }
3571
3572 static int
3573 intel_float_operand (const char *mnemonic)
3574 {
3575   /* Note that the value returned is meaningful only for opcodes with (memory)
3576      operands, hence the code here is free to improperly handle opcodes that
3577      have no operands (for better performance and smaller code). */
3578
3579   if (mnemonic[0] != 'f')
3580     return 0; /* non-math */
3581
3582   switch (mnemonic[1])
3583     {
3584     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3585        the fs segment override prefix not currently handled because no
3586        call path can make opcodes without operands get here */
3587     case 'i':
3588       return 2 /* integer op */;
3589     case 'l':
3590       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3591         return 3; /* fldcw/fldenv */
3592       break;
3593     case 'n':
3594       if (mnemonic[2] != 'o' /* fnop */)
3595         return 3; /* non-waiting control op */
3596       break;
3597     case 'r':
3598       if (mnemonic[2] == 's')
3599         return 3; /* frstor/frstpm */
3600       break;
3601     case 's':
3602       if (mnemonic[2] == 'a')
3603         return 3; /* fsave */
3604       if (mnemonic[2] == 't')
3605         {
3606           switch (mnemonic[3])
3607             {
3608             case 'c': /* fstcw */
3609             case 'd': /* fstdw */
3610             case 'e': /* fstenv */
3611             case 's': /* fsts[gw] */
3612               return 3;
3613             }
3614         }
3615       break;
3616     case 'x':
3617       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3618         return 0; /* fxsave/fxrstor are not really math ops */
3619       break;
3620     }
3621
3622   return 1;
3623 }
3624
3625 static INLINE void
3626 install_template (const insn_template *t)
3627 {
3628   unsigned int l;
3629
3630   i.tm = *t;
3631
3632   /* Note that for pseudo prefixes this produces a length of 1. But for them
3633      the length isn't interesting at all.  */
3634   for (l = 1; l < 4; ++l)
3635     if (!(t->base_opcode >> (8 * l)))
3636       break;
3637
3638   i.opcode_length = l;
3639 }
3640
3641 /* Build the VEX prefix.  */
3642
3643 static void
3644 build_vex_prefix (const insn_template *t)
3645 {
3646   unsigned int register_specifier;
3647   unsigned int vector_length;
3648   unsigned int w;
3649
3650   /* Check register specifier.  */
3651   if (i.vex.register_specifier)
3652     {
3653       register_specifier =
3654         ~register_number (i.vex.register_specifier) & 0xf;
3655       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3656     }
3657   else
3658     register_specifier = 0xf;
3659
3660   /* Use 2-byte VEX prefix by swapping destination and source operand
3661      if there are more than 1 register operand.  */
3662   if (i.reg_operands > 1
3663       && i.vec_encoding != vex_encoding_vex3
3664       && i.dir_encoding == dir_encoding_default
3665       && i.operands == i.reg_operands
3666       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3667       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3668       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3669       && i.rex == REX_B)
3670     {
3671       unsigned int xchg = i.operands - 1;
3672       union i386_op temp_op;
3673       i386_operand_type temp_type;
3674
3675       temp_type = i.types[xchg];
3676       i.types[xchg] = i.types[0];
3677       i.types[0] = temp_type;
3678       temp_op = i.op[xchg];
3679       i.op[xchg] = i.op[0];
3680       i.op[0] = temp_op;
3681
3682       gas_assert (i.rm.mode == 3);
3683
3684       i.rex = REX_R;
3685       xchg = i.rm.regmem;
3686       i.rm.regmem = i.rm.reg;
3687       i.rm.reg = xchg;
3688
3689       if (i.tm.opcode_modifier.d)
3690         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3691                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3692       else /* Use the next insn.  */
3693         install_template (&t[1]);
3694     }
3695
3696   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3697      are no memory operands and at least 3 register ones.  */
3698   if (i.reg_operands >= 3
3699       && i.vec_encoding != vex_encoding_vex3
3700       && i.reg_operands == i.operands - i.imm_operands
3701       && i.tm.opcode_modifier.vex
3702       && i.tm.opcode_modifier.commutative
3703       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3704       && i.rex == REX_B
3705       && i.vex.register_specifier
3706       && !(i.vex.register_specifier->reg_flags & RegRex))
3707     {
3708       unsigned int xchg = i.operands - i.reg_operands;
3709       union i386_op temp_op;
3710       i386_operand_type temp_type;
3711
3712       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3713       gas_assert (!i.tm.opcode_modifier.sae);
3714       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3715                                       &i.types[i.operands - 3]));
3716       gas_assert (i.rm.mode == 3);
3717
3718       temp_type = i.types[xchg];
3719       i.types[xchg] = i.types[xchg + 1];
3720       i.types[xchg + 1] = temp_type;
3721       temp_op = i.op[xchg];
3722       i.op[xchg] = i.op[xchg + 1];
3723       i.op[xchg + 1] = temp_op;
3724
3725       i.rex = 0;
3726       xchg = i.rm.regmem | 8;
3727       i.rm.regmem = ~register_specifier & 0xf;
3728       gas_assert (!(i.rm.regmem & 8));
3729       i.vex.register_specifier += xchg - i.rm.regmem;
3730       register_specifier = ~xchg & 0xf;
3731     }
3732
3733   if (i.tm.opcode_modifier.vex == VEXScalar)
3734     vector_length = avxscalar;
3735   else if (i.tm.opcode_modifier.vex == VEX256)
3736     vector_length = 1;
3737   else
3738     {
3739       unsigned int op;
3740
3741       /* Determine vector length from the last multi-length vector
3742          operand.  */
3743       vector_length = 0;
3744       for (op = t->operands; op--;)
3745         if (t->operand_types[op].bitfield.xmmword
3746             && t->operand_types[op].bitfield.ymmword
3747             && i.types[op].bitfield.ymmword)
3748           {
3749             vector_length = 1;
3750             break;
3751           }
3752     }
3753
3754   /* Check the REX.W bit and VEXW.  */
3755   if (i.tm.opcode_modifier.vexw == VEXWIG)
3756     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3757   else if (i.tm.opcode_modifier.vexw)
3758     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3759   else
3760     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3761
3762   /* Use 2-byte VEX prefix if possible.  */
3763   if (w == 0
3764       && i.vec_encoding != vex_encoding_vex3
3765       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3766       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3767     {
3768       /* 2-byte VEX prefix.  */
3769       unsigned int r;
3770
3771       i.vex.length = 2;
3772       i.vex.bytes[0] = 0xc5;
3773
3774       /* Check the REX.R bit.  */
3775       r = (i.rex & REX_R) ? 0 : 1;
3776       i.vex.bytes[1] = (r << 7
3777                         | register_specifier << 3
3778                         | vector_length << 2
3779                         | i.tm.opcode_modifier.opcodeprefix);
3780     }
3781   else
3782     {
3783       /* 3-byte VEX prefix.  */
3784       i.vex.length = 3;
3785
3786       switch (i.tm.opcode_modifier.opcodespace)
3787         {
3788         case SPACE_0F:
3789         case SPACE_0F38:
3790         case SPACE_0F3A:
3791           i.vex.bytes[0] = 0xc4;
3792           break;
3793         case SPACE_XOP08:
3794         case SPACE_XOP09:
3795         case SPACE_XOP0A:
3796           i.vex.bytes[0] = 0x8f;
3797           break;
3798         default:
3799           abort ();
3800         }
3801
3802       /* The high 3 bits of the second VEX byte are 1's compliment
3803          of RXB bits from REX.  */
3804       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3805
3806       i.vex.bytes[2] = (w << 7
3807                         | register_specifier << 3
3808                         | vector_length << 2
3809                         | i.tm.opcode_modifier.opcodeprefix);
3810     }
3811 }
3812
3813 static INLINE bool
3814 is_evex_encoding (const insn_template *t)
3815 {
3816   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3817          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3818          || t->opcode_modifier.sae;
3819 }
3820
3821 static INLINE bool
3822 is_any_vex_encoding (const insn_template *t)
3823 {
3824   return t->opcode_modifier.vex || is_evex_encoding (t);
3825 }
3826
3827 /* Build the EVEX prefix.  */
3828
3829 static void
3830 build_evex_prefix (void)
3831 {
3832   unsigned int register_specifier, w;
3833   rex_byte vrex_used = 0;
3834
3835   /* Check register specifier.  */
3836   if (i.vex.register_specifier)
3837     {
3838       gas_assert ((i.vrex & REX_X) == 0);
3839
3840       register_specifier = i.vex.register_specifier->reg_num;
3841       if ((i.vex.register_specifier->reg_flags & RegRex))
3842         register_specifier += 8;
3843       /* The upper 16 registers are encoded in the fourth byte of the
3844          EVEX prefix.  */
3845       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3846         i.vex.bytes[3] = 0x8;
3847       register_specifier = ~register_specifier & 0xf;
3848     }
3849   else
3850     {
3851       register_specifier = 0xf;
3852
3853       /* Encode upper 16 vector index register in the fourth byte of
3854          the EVEX prefix.  */
3855       if (!(i.vrex & REX_X))
3856         i.vex.bytes[3] = 0x8;
3857       else
3858         vrex_used |= REX_X;
3859     }
3860
3861   /* 4 byte EVEX prefix.  */
3862   i.vex.length = 4;
3863   i.vex.bytes[0] = 0x62;
3864
3865   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3866      bits from REX.  */
3867   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3868   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_0F3A);
3869   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3870
3871   /* The fifth bit of the second EVEX byte is 1's compliment of the
3872      REX_R bit in VREX.  */
3873   if (!(i.vrex & REX_R))
3874     i.vex.bytes[1] |= 0x10;
3875   else
3876     vrex_used |= REX_R;
3877
3878   if ((i.reg_operands + i.imm_operands) == i.operands)
3879     {
3880       /* When all operands are registers, the REX_X bit in REX is not
3881          used.  We reuse it to encode the upper 16 registers, which is
3882          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3883          as 1's compliment.  */
3884       if ((i.vrex & REX_B))
3885         {
3886           vrex_used |= REX_B;
3887           i.vex.bytes[1] &= ~0x40;
3888         }
3889     }
3890
3891   /* EVEX instructions shouldn't need the REX prefix.  */
3892   i.vrex &= ~vrex_used;
3893   gas_assert (i.vrex == 0);
3894
3895   /* Check the REX.W bit and VEXW.  */
3896   if (i.tm.opcode_modifier.vexw == VEXWIG)
3897     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3898   else if (i.tm.opcode_modifier.vexw)
3899     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3900   else
3901     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3902
3903   /* The third byte of the EVEX prefix.  */
3904   i.vex.bytes[2] = ((w << 7)
3905                     | (register_specifier << 3)
3906                     | 4 /* Encode the U bit.  */
3907                     | i.tm.opcode_modifier.opcodeprefix);
3908
3909   /* The fourth byte of the EVEX prefix.  */
3910   /* The zeroing-masking bit.  */
3911   if (i.mask.reg && i.mask.zeroing)
3912     i.vex.bytes[3] |= 0x80;
3913
3914   /* Don't always set the broadcast bit if there is no RC.  */
3915   if (i.rounding.type == rc_none)
3916     {
3917       /* Encode the vector length.  */
3918       unsigned int vec_length;
3919
3920       if (!i.tm.opcode_modifier.evex
3921           || i.tm.opcode_modifier.evex == EVEXDYN)
3922         {
3923           unsigned int op;
3924
3925           /* Determine vector length from the last multi-length vector
3926              operand.  */
3927           for (op = i.operands; op--;)
3928             if (i.tm.operand_types[op].bitfield.xmmword
3929                 + i.tm.operand_types[op].bitfield.ymmword
3930                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3931               {
3932                 if (i.types[op].bitfield.zmmword)
3933                   {
3934                     i.tm.opcode_modifier.evex = EVEX512;
3935                     break;
3936                   }
3937                 else if (i.types[op].bitfield.ymmword)
3938                   {
3939                     i.tm.opcode_modifier.evex = EVEX256;
3940                     break;
3941                   }
3942                 else if (i.types[op].bitfield.xmmword)
3943                   {
3944                     i.tm.opcode_modifier.evex = EVEX128;
3945                     break;
3946                   }
3947                 else if (i.broadcast.type && op == i.broadcast.operand)
3948                   {
3949                     switch (i.broadcast.bytes)
3950                       {
3951                         case 64:
3952                           i.tm.opcode_modifier.evex = EVEX512;
3953                           break;
3954                         case 32:
3955                           i.tm.opcode_modifier.evex = EVEX256;
3956                           break;
3957                         case 16:
3958                           i.tm.opcode_modifier.evex = EVEX128;
3959                           break;
3960                         default:
3961                           abort ();
3962                       }
3963                     break;
3964                   }
3965               }
3966
3967           if (op >= MAX_OPERANDS)
3968             abort ();
3969         }
3970
3971       switch (i.tm.opcode_modifier.evex)
3972         {
3973         case EVEXLIG: /* LL' is ignored */
3974           vec_length = evexlig << 5;
3975           break;
3976         case EVEX128:
3977           vec_length = 0 << 5;
3978           break;
3979         case EVEX256:
3980           vec_length = 1 << 5;
3981           break;
3982         case EVEX512:
3983           vec_length = 2 << 5;
3984           break;
3985         default:
3986           abort ();
3987           break;
3988         }
3989       i.vex.bytes[3] |= vec_length;
3990       /* Encode the broadcast bit.  */
3991       if (i.broadcast.type)
3992         i.vex.bytes[3] |= 0x10;
3993     }
3994   else if (i.rounding.type != saeonly)
3995     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3996   else
3997     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3998
3999   if (i.mask.reg)
4000     i.vex.bytes[3] |= i.mask.reg->reg_num;
4001 }
4002
4003 static void
4004 process_immext (void)
4005 {
4006   expressionS *exp;
4007
4008   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4009      which is coded in the same place as an 8-bit immediate field
4010      would be.  Here we fake an 8-bit immediate operand from the
4011      opcode suffix stored in tm.extension_opcode.
4012
4013      AVX instructions also use this encoding, for some of
4014      3 argument instructions.  */
4015
4016   gas_assert (i.imm_operands <= 1
4017               && (i.operands <= 2
4018                   || (is_any_vex_encoding (&i.tm)
4019                       && i.operands <= 4)));
4020
4021   exp = &im_expressions[i.imm_operands++];
4022   i.op[i.operands].imms = exp;
4023   i.types[i.operands] = imm8;
4024   i.operands++;
4025   exp->X_op = O_constant;
4026   exp->X_add_number = i.tm.extension_opcode;
4027   i.tm.extension_opcode = None;
4028 }
4029
4030
4031 static int
4032 check_hle (void)
4033 {
4034   switch (i.tm.opcode_modifier.prefixok)
4035     {
4036     default:
4037       abort ();
4038     case PrefixLock:
4039     case PrefixNone:
4040     case PrefixNoTrack:
4041     case PrefixRep:
4042       as_bad (_("invalid instruction `%s' after `%s'"),
4043               i.tm.name, i.hle_prefix);
4044       return 0;
4045     case PrefixHLELock:
4046       if (i.prefix[LOCK_PREFIX])
4047         return 1;
4048       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4049       return 0;
4050     case PrefixHLEAny:
4051       return 1;
4052     case PrefixHLERelease:
4053       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4054         {
4055           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4056                   i.tm.name);
4057           return 0;
4058         }
4059       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4060         {
4061           as_bad (_("memory destination needed for instruction `%s'"
4062                     " after `xrelease'"), i.tm.name);
4063           return 0;
4064         }
4065       return 1;
4066     }
4067 }
4068
4069 /* Try the shortest encoding by shortening operand size.  */
4070
4071 static void
4072 optimize_encoding (void)
4073 {
4074   unsigned int j;
4075
4076   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4077       && i.tm.base_opcode == 0x8d)
4078     {
4079       /* Optimize: -O:
4080            lea symbol, %rN    -> mov $symbol, %rN
4081            lea (%rM), %rN     -> mov %rM, %rN
4082            lea (,%rM,1), %rN  -> mov %rM, %rN
4083
4084            and in 32-bit mode for 16-bit addressing
4085
4086            lea (%rM), %rN     -> movzx %rM, %rN
4087
4088            and in 64-bit mode zap 32-bit addressing in favor of using a
4089            32-bit (or less) destination.
4090        */
4091       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4092         {
4093           if (!i.op[1].regs->reg_type.bitfield.word)
4094             i.tm.opcode_modifier.size = SIZE32;
4095           i.prefix[ADDR_PREFIX] = 0;
4096         }
4097
4098       if (!i.index_reg && !i.base_reg)
4099         {
4100           /* Handle:
4101                lea symbol, %rN    -> mov $symbol, %rN
4102            */
4103           if (flag_code == CODE_64BIT)
4104             {
4105               /* Don't transform a relocation to a 16-bit one.  */
4106               if (i.op[0].disps
4107                   && i.op[0].disps->X_op != O_constant
4108                   && i.op[1].regs->reg_type.bitfield.word)
4109                 return;
4110
4111               if (!i.op[1].regs->reg_type.bitfield.qword
4112                   || i.tm.opcode_modifier.size == SIZE32)
4113                 {
4114                   i.tm.base_opcode = 0xb8;
4115                   i.tm.opcode_modifier.modrm = 0;
4116                   if (!i.op[1].regs->reg_type.bitfield.word)
4117                     i.types[0].bitfield.imm32 = 1;
4118                   else
4119                     {
4120                       i.tm.opcode_modifier.size = SIZE16;
4121                       i.types[0].bitfield.imm16 = 1;
4122                     }
4123                 }
4124               else
4125                 {
4126                   /* Subject to further optimization below.  */
4127                   i.tm.base_opcode = 0xc7;
4128                   i.tm.extension_opcode = 0;
4129                   i.types[0].bitfield.imm32s = 1;
4130                   i.types[0].bitfield.baseindex = 0;
4131                 }
4132             }
4133           /* Outside of 64-bit mode address and operand sizes have to match if
4134              a relocation is involved, as otherwise we wouldn't (currently) or
4135              even couldn't express the relocation correctly.  */
4136           else if (i.op[0].disps
4137                    && i.op[0].disps->X_op != O_constant
4138                    && ((!i.prefix[ADDR_PREFIX])
4139                        != (flag_code == CODE_32BIT
4140                            ? i.op[1].regs->reg_type.bitfield.dword
4141                            : i.op[1].regs->reg_type.bitfield.word)))
4142             return;
4143           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4144              destination is going to grow encoding size.  */
4145           else if (flag_code == CODE_16BIT
4146                    && (optimize <= 1 || optimize_for_space)
4147                    && !i.prefix[ADDR_PREFIX]
4148                    && i.op[1].regs->reg_type.bitfield.dword)
4149             return;
4150           else
4151             {
4152               i.tm.base_opcode = 0xb8;
4153               i.tm.opcode_modifier.modrm = 0;
4154               if (i.op[1].regs->reg_type.bitfield.dword)
4155                 i.types[0].bitfield.imm32 = 1;
4156               else
4157                 i.types[0].bitfield.imm16 = 1;
4158
4159               if (i.op[0].disps
4160                   && i.op[0].disps->X_op == O_constant
4161                   && i.op[1].regs->reg_type.bitfield.dword
4162                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4163                      GCC 5. */
4164                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4165                 i.op[0].disps->X_add_number &= 0xffff;
4166             }
4167
4168           i.tm.operand_types[0] = i.types[0];
4169           i.imm_operands = 1;
4170           if (!i.op[0].imms)
4171             {
4172               i.op[0].imms = &im_expressions[0];
4173               i.op[0].imms->X_op = O_absent;
4174             }
4175         }
4176       else if (i.op[0].disps
4177                   && (i.op[0].disps->X_op != O_constant
4178                       || i.op[0].disps->X_add_number))
4179         return;
4180       else
4181         {
4182           /* Handle:
4183                lea (%rM), %rN     -> mov %rM, %rN
4184                lea (,%rM,1), %rN  -> mov %rM, %rN
4185                lea (%rM), %rN     -> movzx %rM, %rN
4186            */
4187           const reg_entry *addr_reg;
4188
4189           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4190             addr_reg = i.base_reg;
4191           else if (!i.base_reg
4192                    && i.index_reg->reg_num != RegIZ
4193                    && !i.log2_scale_factor)
4194             addr_reg = i.index_reg;
4195           else
4196             return;
4197
4198           if (addr_reg->reg_type.bitfield.word
4199               && i.op[1].regs->reg_type.bitfield.dword)
4200             {
4201               if (flag_code != CODE_32BIT)
4202                 return;
4203               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4204               i.tm.base_opcode = 0xb7;
4205             }
4206           else
4207             i.tm.base_opcode = 0x8b;
4208
4209           if (addr_reg->reg_type.bitfield.dword
4210               && i.op[1].regs->reg_type.bitfield.qword)
4211             i.tm.opcode_modifier.size = SIZE32;
4212
4213           i.op[0].regs = addr_reg;
4214           i.reg_operands = 2;
4215         }
4216
4217       i.mem_operands = 0;
4218       i.disp_operands = 0;
4219       i.prefix[ADDR_PREFIX] = 0;
4220       i.prefix[SEG_PREFIX] = 0;
4221       i.seg[0] = NULL;
4222     }
4223
4224   if (optimize_for_space
4225       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4226       && i.reg_operands == 1
4227       && i.imm_operands == 1
4228       && !i.types[1].bitfield.byte
4229       && i.op[0].imms->X_op == O_constant
4230       && fits_in_imm7 (i.op[0].imms->X_add_number)
4231       && (i.tm.base_opcode == 0xa8
4232           || (i.tm.base_opcode == 0xf6
4233               && i.tm.extension_opcode == 0x0)))
4234     {
4235       /* Optimize: -Os:
4236            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4237        */
4238       unsigned int base_regnum = i.op[1].regs->reg_num;
4239       if (flag_code == CODE_64BIT || base_regnum < 4)
4240         {
4241           i.types[1].bitfield.byte = 1;
4242           /* Ignore the suffix.  */
4243           i.suffix = 0;
4244           /* Convert to byte registers.  */
4245           if (i.types[1].bitfield.word)
4246             j = 16;
4247           else if (i.types[1].bitfield.dword)
4248             j = 32;
4249           else
4250             j = 48;
4251           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4252             j += 8;
4253           i.op[1].regs -= j;
4254         }
4255     }
4256   else if (flag_code == CODE_64BIT
4257            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4258            && ((i.types[1].bitfield.qword
4259                 && i.reg_operands == 1
4260                 && i.imm_operands == 1
4261                 && i.op[0].imms->X_op == O_constant
4262                 && ((i.tm.base_opcode == 0xb8
4263                      && i.tm.extension_opcode == None
4264                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4265                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4266                         && ((i.tm.base_opcode == 0x24
4267                              || i.tm.base_opcode == 0xa8)
4268                             || (i.tm.base_opcode == 0x80
4269                                 && i.tm.extension_opcode == 0x4)
4270                             || ((i.tm.base_opcode == 0xf6
4271                                  || (i.tm.base_opcode | 1) == 0xc7)
4272                                 && i.tm.extension_opcode == 0x0)))
4273                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4274                         && i.tm.base_opcode == 0x83
4275                         && i.tm.extension_opcode == 0x4)))
4276                || (i.types[0].bitfield.qword
4277                    && ((i.reg_operands == 2
4278                         && i.op[0].regs == i.op[1].regs
4279                         && (i.tm.base_opcode == 0x30
4280                             || i.tm.base_opcode == 0x28))
4281                        || (i.reg_operands == 1
4282                            && i.operands == 1
4283                            && i.tm.base_opcode == 0x30)))))
4284     {
4285       /* Optimize: -O:
4286            andq $imm31, %r64   -> andl $imm31, %r32
4287            andq $imm7, %r64    -> andl $imm7, %r32
4288            testq $imm31, %r64  -> testl $imm31, %r32
4289            xorq %r64, %r64     -> xorl %r32, %r32
4290            subq %r64, %r64     -> subl %r32, %r32
4291            movq $imm31, %r64   -> movl $imm31, %r32
4292            movq $imm32, %r64   -> movl $imm32, %r32
4293         */
4294       i.tm.opcode_modifier.norex64 = 1;
4295       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4296         {
4297           /* Handle
4298                movq $imm31, %r64   -> movl $imm31, %r32
4299                movq $imm32, %r64   -> movl $imm32, %r32
4300            */
4301           i.tm.operand_types[0].bitfield.imm32 = 1;
4302           i.tm.operand_types[0].bitfield.imm32s = 0;
4303           i.tm.operand_types[0].bitfield.imm64 = 0;
4304           i.types[0].bitfield.imm32 = 1;
4305           i.types[0].bitfield.imm32s = 0;
4306           i.types[0].bitfield.imm64 = 0;
4307           i.types[1].bitfield.dword = 1;
4308           i.types[1].bitfield.qword = 0;
4309           if ((i.tm.base_opcode | 1) == 0xc7)
4310             {
4311               /* Handle
4312                    movq $imm31, %r64   -> movl $imm31, %r32
4313                */
4314               i.tm.base_opcode = 0xb8;
4315               i.tm.extension_opcode = None;
4316               i.tm.opcode_modifier.w = 0;
4317               i.tm.opcode_modifier.modrm = 0;
4318             }
4319         }
4320     }
4321   else if (optimize > 1
4322            && !optimize_for_space
4323            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4324            && i.reg_operands == 2
4325            && i.op[0].regs == i.op[1].regs
4326            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4327                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4328            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4329     {
4330       /* Optimize: -O2:
4331            andb %rN, %rN  -> testb %rN, %rN
4332            andw %rN, %rN  -> testw %rN, %rN
4333            andq %rN, %rN  -> testq %rN, %rN
4334            orb %rN, %rN   -> testb %rN, %rN
4335            orw %rN, %rN   -> testw %rN, %rN
4336            orq %rN, %rN   -> testq %rN, %rN
4337
4338            and outside of 64-bit mode
4339
4340            andl %rN, %rN  -> testl %rN, %rN
4341            orl %rN, %rN   -> testl %rN, %rN
4342        */
4343       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4344     }
4345   else if (i.reg_operands == 3
4346            && i.op[0].regs == i.op[1].regs
4347            && !i.types[2].bitfield.xmmword
4348            && (i.tm.opcode_modifier.vex
4349                || ((!i.mask.reg || i.mask.zeroing)
4350                    && i.rounding.type == rc_none
4351                    && is_evex_encoding (&i.tm)
4352                    && (i.vec_encoding != vex_encoding_evex
4353                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4354                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4355                        || (i.tm.operand_types[2].bitfield.zmmword
4356                            && i.types[2].bitfield.ymmword))))
4357            && ((i.tm.base_opcode == 0x55
4358                 || i.tm.base_opcode == 0x57
4359                 || i.tm.base_opcode == 0xdf
4360                 || i.tm.base_opcode == 0xef
4361                 || i.tm.base_opcode == 0xf8
4362                 || i.tm.base_opcode == 0xf9
4363                 || i.tm.base_opcode == 0xfa
4364                 || i.tm.base_opcode == 0xfb
4365                 || i.tm.base_opcode == 0x42
4366                 || i.tm.base_opcode == 0x47)
4367                && i.tm.extension_opcode == None))
4368     {
4369       /* Optimize: -O1:
4370            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4371            vpsubq and vpsubw:
4372              EVEX VOP %zmmM, %zmmM, %zmmN
4373                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4374                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4375              EVEX VOP %ymmM, %ymmM, %ymmN
4376                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4377                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4378              VEX VOP %ymmM, %ymmM, %ymmN
4379                -> VEX VOP %xmmM, %xmmM, %xmmN
4380            VOP, one of vpandn and vpxor:
4381              VEX VOP %ymmM, %ymmM, %ymmN
4382                -> VEX VOP %xmmM, %xmmM, %xmmN
4383            VOP, one of vpandnd and vpandnq:
4384              EVEX VOP %zmmM, %zmmM, %zmmN
4385                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4386                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4387              EVEX VOP %ymmM, %ymmM, %ymmN
4388                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4389                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4390            VOP, one of vpxord and vpxorq:
4391              EVEX VOP %zmmM, %zmmM, %zmmN
4392                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4393                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4394              EVEX VOP %ymmM, %ymmM, %ymmN
4395                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4396                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4397            VOP, one of kxord and kxorq:
4398              VEX VOP %kM, %kM, %kN
4399                -> VEX kxorw %kM, %kM, %kN
4400            VOP, one of kandnd and kandnq:
4401              VEX VOP %kM, %kM, %kN
4402                -> VEX kandnw %kM, %kM, %kN
4403        */
4404       if (is_evex_encoding (&i.tm))
4405         {
4406           if (i.vec_encoding != vex_encoding_evex)
4407             {
4408               i.tm.opcode_modifier.vex = VEX128;
4409               i.tm.opcode_modifier.vexw = VEXW0;
4410               i.tm.opcode_modifier.evex = 0;
4411             }
4412           else if (optimize > 1)
4413             i.tm.opcode_modifier.evex = EVEX128;
4414           else
4415             return;
4416         }
4417       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4418         {
4419           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4420           i.tm.opcode_modifier.vexw = VEXW0;
4421         }
4422       else
4423         i.tm.opcode_modifier.vex = VEX128;
4424
4425       if (i.tm.opcode_modifier.vex)
4426         for (j = 0; j < 3; j++)
4427           {
4428             i.types[j].bitfield.xmmword = 1;
4429             i.types[j].bitfield.ymmword = 0;
4430           }
4431     }
4432   else if (i.vec_encoding != vex_encoding_evex
4433            && !i.types[0].bitfield.zmmword
4434            && !i.types[1].bitfield.zmmword
4435            && !i.mask.reg
4436            && !i.broadcast.type
4437            && is_evex_encoding (&i.tm)
4438            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4439                || (i.tm.base_opcode & ~4) == 0xdb
4440                || (i.tm.base_opcode & ~4) == 0xeb)
4441            && i.tm.extension_opcode == None)
4442     {
4443       /* Optimize: -O1:
4444            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4445            vmovdqu32 and vmovdqu64:
4446              EVEX VOP %xmmM, %xmmN
4447                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4448              EVEX VOP %ymmM, %ymmN
4449                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4450              EVEX VOP %xmmM, mem
4451                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4452              EVEX VOP %ymmM, mem
4453                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4454              EVEX VOP mem, %xmmN
4455                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4456              EVEX VOP mem, %ymmN
4457                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4458            VOP, one of vpand, vpandn, vpor, vpxor:
4459              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4460                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4461              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4462                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4463              EVEX VOP{d,q} mem, %xmmM, %xmmN
4464                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4465              EVEX VOP{d,q} mem, %ymmM, %ymmN
4466                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4467        */
4468       for (j = 0; j < i.operands; j++)
4469         if (operand_type_check (i.types[j], disp)
4470             && i.op[j].disps->X_op == O_constant)
4471           {
4472             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4473                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4474                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4475             int evex_disp8, vex_disp8;
4476             unsigned int memshift = i.memshift;
4477             offsetT n = i.op[j].disps->X_add_number;
4478
4479             evex_disp8 = fits_in_disp8 (n);
4480             i.memshift = 0;
4481             vex_disp8 = fits_in_disp8 (n);
4482             if (evex_disp8 != vex_disp8)
4483               {
4484                 i.memshift = memshift;
4485                 return;
4486               }
4487
4488             i.types[j].bitfield.disp8 = vex_disp8;
4489             break;
4490           }
4491       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4492           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4493         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4494       i.tm.opcode_modifier.vex
4495         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4496       i.tm.opcode_modifier.vexw = VEXW0;
4497       /* VPAND, VPOR, and VPXOR are commutative.  */
4498       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4499         i.tm.opcode_modifier.commutative = 1;
4500       i.tm.opcode_modifier.evex = 0;
4501       i.tm.opcode_modifier.masking = 0;
4502       i.tm.opcode_modifier.broadcast = 0;
4503       i.tm.opcode_modifier.disp8memshift = 0;
4504       i.memshift = 0;
4505       if (j < i.operands)
4506         i.types[j].bitfield.disp8
4507           = fits_in_disp8 (i.op[j].disps->X_add_number);
4508     }
4509 }
4510
4511 /* Return non-zero for load instruction.  */
4512
4513 static int
4514 load_insn_p (void)
4515 {
4516   unsigned int dest;
4517   int any_vex_p = is_any_vex_encoding (&i.tm);
4518   unsigned int base_opcode = i.tm.base_opcode | 1;
4519
4520   if (!any_vex_p)
4521     {
4522       /* Anysize insns: lea, invlpg, clflush, prefetchnta, prefetcht0,
4523          prefetcht1, prefetcht2, prefetchtw, bndmk, bndcl, bndcu, bndcn,
4524          bndstx, bndldx, prefetchwt1, clflushopt, clwb, cldemote.  */
4525       if (i.tm.opcode_modifier.anysize)
4526         return 0;
4527
4528       /* pop.   */
4529       if (strcmp (i.tm.name, "pop") == 0)
4530         return 1;
4531     }
4532
4533   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4534     {
4535       /* popf, popa.   */
4536       if (i.tm.base_opcode == 0x9d
4537           || i.tm.base_opcode == 0x61)
4538         return 1;
4539
4540       /* movs, cmps, lods, scas.  */
4541       if ((i.tm.base_opcode | 0xb) == 0xaf)
4542         return 1;
4543
4544       /* outs, xlatb.  */
4545       if (base_opcode == 0x6f
4546           || i.tm.base_opcode == 0xd7)
4547         return 1;
4548       /* NB: For AMD-specific insns with implicit memory operands,
4549          they're intentionally not covered.  */
4550     }
4551
4552   /* No memory operand.  */
4553   if (!i.mem_operands)
4554     return 0;
4555
4556   if (any_vex_p)
4557     {
4558       /* vldmxcsr.  */
4559       if (i.tm.base_opcode == 0xae
4560           && i.tm.opcode_modifier.vex
4561           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4562           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4563           && i.tm.extension_opcode == 2)
4564         return 1;
4565     }
4566   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4567     {
4568       /* test, not, neg, mul, imul, div, idiv.  */
4569       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4570           && i.tm.extension_opcode != 1)
4571         return 1;
4572
4573       /* inc, dec.  */
4574       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4575         return 1;
4576
4577       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4578       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4579         return 1;
4580
4581       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4582       if ((base_opcode == 0xc1
4583            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4584           && i.tm.extension_opcode != 6)
4585         return 1;
4586
4587       /* Check for x87 instructions.  */
4588       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4589         {
4590           /* Skip fst, fstp, fstenv, fstcw.  */
4591           if (i.tm.base_opcode == 0xd9
4592               && (i.tm.extension_opcode == 2
4593                   || i.tm.extension_opcode == 3
4594                   || i.tm.extension_opcode == 6
4595                   || i.tm.extension_opcode == 7))
4596             return 0;
4597
4598           /* Skip fisttp, fist, fistp, fstp.  */
4599           if (i.tm.base_opcode == 0xdb
4600               && (i.tm.extension_opcode == 1
4601                   || i.tm.extension_opcode == 2
4602                   || i.tm.extension_opcode == 3
4603                   || i.tm.extension_opcode == 7))
4604             return 0;
4605
4606           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4607           if (i.tm.base_opcode == 0xdd
4608               && (i.tm.extension_opcode == 1
4609                   || i.tm.extension_opcode == 2
4610                   || i.tm.extension_opcode == 3
4611                   || i.tm.extension_opcode == 6
4612                   || i.tm.extension_opcode == 7))
4613             return 0;
4614
4615           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4616           if (i.tm.base_opcode == 0xdf
4617               && (i.tm.extension_opcode == 1
4618                   || i.tm.extension_opcode == 2
4619                   || i.tm.extension_opcode == 3
4620                   || i.tm.extension_opcode == 6
4621                   || i.tm.extension_opcode == 7))
4622             return 0;
4623
4624           return 1;
4625         }
4626     }
4627   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4628     {
4629       /* bt, bts, btr, btc.  */
4630       if (i.tm.base_opcode == 0xba
4631           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4632         return 1;
4633
4634       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4635       if (i.tm.base_opcode == 0xc7
4636           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4637           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4638               || i.tm.extension_opcode == 6))
4639         return 1;
4640
4641       /* fxrstor, ldmxcsr, xrstor.  */
4642       if (i.tm.base_opcode == 0xae
4643           && (i.tm.extension_opcode == 1
4644               || i.tm.extension_opcode == 2
4645               || i.tm.extension_opcode == 5))
4646         return 1;
4647
4648       /* lgdt, lidt, lmsw.  */
4649       if (i.tm.base_opcode == 0x01
4650           && (i.tm.extension_opcode == 2
4651               || i.tm.extension_opcode == 3
4652               || i.tm.extension_opcode == 6))
4653         return 1;
4654     }
4655
4656   dest = i.operands - 1;
4657
4658   /* Check fake imm8 operand and 3 source operands.  */
4659   if ((i.tm.opcode_modifier.immext
4660        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4661       && i.types[dest].bitfield.imm8)
4662     dest--;
4663
4664   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4665   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4666       && (base_opcode == 0x1
4667           || base_opcode == 0x9
4668           || base_opcode == 0x11
4669           || base_opcode == 0x19
4670           || base_opcode == 0x21
4671           || base_opcode == 0x29
4672           || base_opcode == 0x31
4673           || base_opcode == 0x39
4674           || (base_opcode | 2) == 0x87))
4675     return 1;
4676
4677   /* xadd.  */
4678   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4679       && base_opcode == 0xc1)
4680     return 1;
4681
4682   /* Check for load instruction.  */
4683   return (i.types[dest].bitfield.class != ClassNone
4684           || i.types[dest].bitfield.instance == Accum);
4685 }
4686
4687 /* Output lfence, 0xfaee8, after instruction.  */
4688
4689 static void
4690 insert_lfence_after (void)
4691 {
4692   if (lfence_after_load && load_insn_p ())
4693     {
4694       /* There are also two REP string instructions that require
4695          special treatment. Specifically, the compare string (CMPS)
4696          and scan string (SCAS) instructions set EFLAGS in a manner
4697          that depends on the data being compared/scanned. When used
4698          with a REP prefix, the number of iterations may therefore
4699          vary depending on this data. If the data is a program secret
4700          chosen by the adversary using an LVI method,
4701          then this data-dependent behavior may leak some aspect
4702          of the secret.  */
4703       if (((i.tm.base_opcode | 0x1) == 0xa7
4704            || (i.tm.base_opcode | 0x1) == 0xaf)
4705           && i.prefix[REP_PREFIX])
4706         {
4707             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4708                      i.tm.name);
4709         }
4710       char *p = frag_more (3);
4711       *p++ = 0xf;
4712       *p++ = 0xae;
4713       *p = 0xe8;
4714     }
4715 }
4716
4717 /* Output lfence, 0xfaee8, before instruction.  */
4718
4719 static void
4720 insert_lfence_before (void)
4721 {
4722   char *p;
4723
4724   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4725     return;
4726
4727   if (i.tm.base_opcode == 0xff
4728       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4729     {
4730       /* Insert lfence before indirect branch if needed.  */
4731
4732       if (lfence_before_indirect_branch == lfence_branch_none)
4733         return;
4734
4735       if (i.operands != 1)
4736         abort ();
4737
4738       if (i.reg_operands == 1)
4739         {
4740           /* Indirect branch via register.  Don't insert lfence with
4741              -mlfence-after-load=yes.  */
4742           if (lfence_after_load
4743               || lfence_before_indirect_branch == lfence_branch_memory)
4744             return;
4745         }
4746       else if (i.mem_operands == 1
4747                && lfence_before_indirect_branch != lfence_branch_register)
4748         {
4749           as_warn (_("indirect `%s` with memory operand should be avoided"),
4750                    i.tm.name);
4751           return;
4752         }
4753       else
4754         return;
4755
4756       if (last_insn.kind != last_insn_other
4757           && last_insn.seg == now_seg)
4758         {
4759           as_warn_where (last_insn.file, last_insn.line,
4760                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4761                          last_insn.name, i.tm.name);
4762           return;
4763         }
4764
4765       p = frag_more (3);
4766       *p++ = 0xf;
4767       *p++ = 0xae;
4768       *p = 0xe8;
4769       return;
4770     }
4771
4772   /* Output or/not/shl and lfence before near ret.  */
4773   if (lfence_before_ret != lfence_before_ret_none
4774       && (i.tm.base_opcode == 0xc2
4775           || i.tm.base_opcode == 0xc3))
4776     {
4777       if (last_insn.kind != last_insn_other
4778           && last_insn.seg == now_seg)
4779         {
4780           as_warn_where (last_insn.file, last_insn.line,
4781                          _("`%s` skips -mlfence-before-ret on `%s`"),
4782                          last_insn.name, i.tm.name);
4783           return;
4784         }
4785
4786       /* Near ret ingore operand size override under CPU64.  */
4787       char prefix = flag_code == CODE_64BIT
4788                     ? 0x48
4789                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4790
4791       if (lfence_before_ret == lfence_before_ret_not)
4792         {
4793           /* not: 0xf71424, may add prefix
4794              for operand size override or 64-bit code.  */
4795           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4796           if (prefix)
4797             *p++ = prefix;
4798           *p++ = 0xf7;
4799           *p++ = 0x14;
4800           *p++ = 0x24;
4801           if (prefix)
4802             *p++ = prefix;
4803           *p++ = 0xf7;
4804           *p++ = 0x14;
4805           *p++ = 0x24;
4806         }
4807       else
4808         {
4809           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4810           if (prefix)
4811             *p++ = prefix;
4812           if (lfence_before_ret == lfence_before_ret_or)
4813             {
4814               /* or: 0x830c2400, may add prefix
4815                  for operand size override or 64-bit code.  */
4816               *p++ = 0x83;
4817               *p++ = 0x0c;
4818             }
4819           else
4820             {
4821               /* shl: 0xc1242400, may add prefix
4822                  for operand size override or 64-bit code.  */
4823               *p++ = 0xc1;
4824               *p++ = 0x24;
4825             }
4826
4827           *p++ = 0x24;
4828           *p++ = 0x0;
4829         }
4830
4831       *p++ = 0xf;
4832       *p++ = 0xae;
4833       *p = 0xe8;
4834     }
4835 }
4836
4837 /* This is the guts of the machine-dependent assembler.  LINE points to a
4838    machine dependent instruction.  This function is supposed to emit
4839    the frags/bytes it assembles to.  */
4840
4841 void
4842 md_assemble (char *line)
4843 {
4844   unsigned int j;
4845   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4846   const insn_template *t;
4847
4848   /* Initialize globals.  */
4849   memset (&i, '\0', sizeof (i));
4850   i.rounding.type = rc_none;
4851   for (j = 0; j < MAX_OPERANDS; j++)
4852     i.reloc[j] = NO_RELOC;
4853   memset (disp_expressions, '\0', sizeof (disp_expressions));
4854   memset (im_expressions, '\0', sizeof (im_expressions));
4855   save_stack_p = save_stack;
4856
4857   /* First parse an instruction mnemonic & call i386_operand for the operands.
4858      We assume that the scrubber has arranged it so that line[0] is the valid
4859      start of a (possibly prefixed) mnemonic.  */
4860
4861   line = parse_insn (line, mnemonic);
4862   if (line == NULL)
4863     return;
4864   mnem_suffix = i.suffix;
4865
4866   line = parse_operands (line, mnemonic);
4867   this_operand = -1;
4868   xfree (i.memop1_string);
4869   i.memop1_string = NULL;
4870   if (line == NULL)
4871     return;
4872
4873   /* Now we've parsed the mnemonic into a set of templates, and have the
4874      operands at hand.  */
4875
4876   /* All Intel opcodes have reversed operands except for "bound", "enter",
4877      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4878      "rmpadjust", and "rmpupdate".  We also don't reverse intersegment "jmp"
4879      and "call" instructions with 2 immediate operands so that the immediate
4880      segment precedes the offset consistently in Intel and AT&T modes.  */
4881   if (intel_syntax
4882       && i.operands > 1
4883       && (strcmp (mnemonic, "bound") != 0)
4884       && (strncmp (mnemonic, "invlpg", 6) != 0)
4885       && !startswith (mnemonic, "monitor")
4886       && !startswith (mnemonic, "mwait")
4887       && (strcmp (mnemonic, "pvalidate") != 0)
4888       && !startswith (mnemonic, "rmp")
4889       && (strcmp (mnemonic, "tpause") != 0)
4890       && (strcmp (mnemonic, "umwait") != 0)
4891       && !(operand_type_check (i.types[0], imm)
4892            && operand_type_check (i.types[1], imm)))
4893     swap_operands ();
4894
4895   /* The order of the immediates should be reversed
4896      for 2 immediates extrq and insertq instructions */
4897   if (i.imm_operands == 2
4898       && (strcmp (mnemonic, "extrq") == 0
4899           || strcmp (mnemonic, "insertq") == 0))
4900       swap_2_operands (0, 1);
4901
4902   if (i.imm_operands)
4903     optimize_imm ();
4904
4905   if (i.disp_operands && !want_disp32 (current_templates->start))
4906     {
4907       for (j = 0; j < i.operands; ++j)
4908         {
4909           const expressionS *exp = i.op[j].disps;
4910
4911           if (!operand_type_check (i.types[j], disp))
4912             continue;
4913
4914           if (exp->X_op != O_constant)
4915             continue;
4916
4917           /* Since displacement is signed extended to 64bit, don't allow
4918              disp32 and turn off disp32s if they are out of range.  */
4919           i.types[j].bitfield.disp32 = 0;
4920           if (fits_in_signed_long (exp->X_add_number))
4921             continue;
4922
4923           i.types[j].bitfield.disp32s = 0;
4924           if (i.types[j].bitfield.baseindex)
4925             {
4926               as_bad (_("0x%" BFD_VMA_FMT "x out of range of signed 32bit displacement"),
4927                       exp->X_add_number);
4928               return;
4929             }
4930         }
4931     }
4932
4933   /* Don't optimize displacement for movabs since it only takes 64bit
4934      displacement.  */
4935   if (i.disp_operands
4936       && i.disp_encoding != disp_encoding_32bit
4937       && (flag_code != CODE_64BIT
4938           || strcmp (mnemonic, "movabs") != 0))
4939     optimize_disp ();
4940
4941   /* Next, we find a template that matches the given insn,
4942      making sure the overlap of the given operands types is consistent
4943      with the template operand types.  */
4944
4945   if (!(t = match_template (mnem_suffix)))
4946     return;
4947
4948   if (sse_check != check_none
4949       && !i.tm.opcode_modifier.noavx
4950       && !i.tm.cpu_flags.bitfield.cpuavx
4951       && !i.tm.cpu_flags.bitfield.cpuavx512f
4952       && (i.tm.cpu_flags.bitfield.cpusse
4953           || i.tm.cpu_flags.bitfield.cpusse2
4954           || i.tm.cpu_flags.bitfield.cpusse3
4955           || i.tm.cpu_flags.bitfield.cpussse3
4956           || i.tm.cpu_flags.bitfield.cpusse4_1
4957           || i.tm.cpu_flags.bitfield.cpusse4_2
4958           || i.tm.cpu_flags.bitfield.cpupclmul
4959           || i.tm.cpu_flags.bitfield.cpuaes
4960           || i.tm.cpu_flags.bitfield.cpusha
4961           || i.tm.cpu_flags.bitfield.cpugfni))
4962     {
4963       (sse_check == check_warning
4964        ? as_warn
4965        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4966     }
4967
4968   if (i.tm.opcode_modifier.fwait)
4969     if (!add_prefix (FWAIT_OPCODE))
4970       return;
4971
4972   /* Check if REP prefix is OK.  */
4973   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
4974     {
4975       as_bad (_("invalid instruction `%s' after `%s'"),
4976                 i.tm.name, i.rep_prefix);
4977       return;
4978     }
4979
4980   /* Check for lock without a lockable instruction.  Destination operand
4981      must be memory unless it is xchg (0x86).  */
4982   if (i.prefix[LOCK_PREFIX]
4983       && (i.tm.opcode_modifier.prefixok < PrefixLock
4984           || i.mem_operands == 0
4985           || (i.tm.base_opcode != 0x86
4986               && !(i.flags[i.operands - 1] & Operand_Mem))))
4987     {
4988       as_bad (_("expecting lockable instruction after `lock'"));
4989       return;
4990     }
4991
4992   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
4993   if (i.prefix[DATA_PREFIX]
4994       && (is_any_vex_encoding (&i.tm)
4995           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
4996           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
4997     {
4998       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4999       return;
5000     }
5001
5002   /* Check if HLE prefix is OK.  */
5003   if (i.hle_prefix && !check_hle ())
5004     return;
5005
5006   /* Check BND prefix.  */
5007   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5008     as_bad (_("expecting valid branch instruction after `bnd'"));
5009
5010   /* Check NOTRACK prefix.  */
5011   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5012     as_bad (_("expecting indirect branch instruction after `notrack'"));
5013
5014   if (i.tm.cpu_flags.bitfield.cpumpx)
5015     {
5016       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5017         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5018       else if (flag_code != CODE_16BIT
5019                ? i.prefix[ADDR_PREFIX]
5020                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5021         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5022     }
5023
5024   /* Insert BND prefix.  */
5025   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5026     {
5027       if (!i.prefix[BND_PREFIX])
5028         add_prefix (BND_PREFIX_OPCODE);
5029       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5030         {
5031           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5032           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5033         }
5034     }
5035
5036   /* Check string instruction segment overrides.  */
5037   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5038     {
5039       gas_assert (i.mem_operands);
5040       if (!check_string ())
5041         return;
5042       i.disp_operands = 0;
5043     }
5044
5045   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5046     optimize_encoding ();
5047
5048   if (!process_suffix ())
5049     return;
5050
5051   /* Update operand types and check extended states.  */
5052   for (j = 0; j < i.operands; j++)
5053     {
5054       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5055       switch (i.tm.operand_types[j].bitfield.class)
5056         {
5057         default:
5058           break;
5059         case RegMMX:
5060           i.xstate |= xstate_mmx;
5061           break;
5062         case RegMask:
5063           i.xstate |= xstate_mask;
5064           break;
5065         case RegSIMD:
5066           if (i.tm.operand_types[j].bitfield.tmmword)
5067             i.xstate |= xstate_tmm;
5068           else if (i.tm.operand_types[j].bitfield.zmmword)
5069             i.xstate |= xstate_zmm;
5070           else if (i.tm.operand_types[j].bitfield.ymmword)
5071             i.xstate |= xstate_ymm;
5072           else if (i.tm.operand_types[j].bitfield.xmmword)
5073             i.xstate |= xstate_xmm;
5074           break;
5075         }
5076     }
5077
5078   /* Make still unresolved immediate matches conform to size of immediate
5079      given in i.suffix.  */
5080   if (!finalize_imm ())
5081     return;
5082
5083   if (i.types[0].bitfield.imm1)
5084     i.imm_operands = 0; /* kludge for shift insns.  */
5085
5086   /* We only need to check those implicit registers for instructions
5087      with 3 operands or less.  */
5088   if (i.operands <= 3)
5089     for (j = 0; j < i.operands; j++)
5090       if (i.types[j].bitfield.instance != InstanceNone
5091           && !i.types[j].bitfield.xmmword)
5092         i.reg_operands--;
5093
5094   /* For insns with operands there are more diddles to do to the opcode.  */
5095   if (i.operands)
5096     {
5097       if (!process_operands ())
5098         return;
5099     }
5100   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
5101     {
5102       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5103       as_warn (_("translating to `%sp'"), i.tm.name);
5104     }
5105
5106   if (is_any_vex_encoding (&i.tm))
5107     {
5108       if (!cpu_arch_flags.bitfield.cpui286)
5109         {
5110           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5111                   i.tm.name);
5112           return;
5113         }
5114
5115       /* Check for explicit REX prefix.  */
5116       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5117         {
5118           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
5119           return;
5120         }
5121
5122       if (i.tm.opcode_modifier.vex)
5123         build_vex_prefix (t);
5124       else
5125         build_evex_prefix ();
5126
5127       /* The individual REX.RXBW bits got consumed.  */
5128       i.rex &= REX_OPCODE;
5129     }
5130
5131   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5132      instructions may define INT_OPCODE as well, so avoid this corner
5133      case for those instructions that use MODRM.  */
5134   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5135       && i.tm.base_opcode == INT_OPCODE
5136       && !i.tm.opcode_modifier.modrm
5137       && i.op[0].imms->X_add_number == 3)
5138     {
5139       i.tm.base_opcode = INT3_OPCODE;
5140       i.imm_operands = 0;
5141     }
5142
5143   if ((i.tm.opcode_modifier.jump == JUMP
5144        || i.tm.opcode_modifier.jump == JUMP_BYTE
5145        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5146       && i.op[0].disps->X_op == O_constant)
5147     {
5148       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5149          the absolute address given by the constant.  Since ix86 jumps and
5150          calls are pc relative, we need to generate a reloc.  */
5151       i.op[0].disps->X_add_symbol = &abs_symbol;
5152       i.op[0].disps->X_op = O_symbol;
5153     }
5154
5155   /* For 8 bit registers we need an empty rex prefix.  Also if the
5156      instruction already has a prefix, we need to convert old
5157      registers to new ones.  */
5158
5159   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5160        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5161       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5162           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5163       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5164            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5165           && i.rex != 0))
5166     {
5167       int x;
5168
5169       i.rex |= REX_OPCODE;
5170       for (x = 0; x < 2; x++)
5171         {
5172           /* Look for 8 bit operand that uses old registers.  */
5173           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5174               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5175             {
5176               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5177               /* In case it is "hi" register, give up.  */
5178               if (i.op[x].regs->reg_num > 3)
5179                 as_bad (_("can't encode register '%s%s' in an "
5180                           "instruction requiring REX prefix."),
5181                         register_prefix, i.op[x].regs->reg_name);
5182
5183               /* Otherwise it is equivalent to the extended register.
5184                  Since the encoding doesn't change this is merely
5185                  cosmetic cleanup for debug output.  */
5186
5187               i.op[x].regs = i.op[x].regs + 8;
5188             }
5189         }
5190     }
5191
5192   if (i.rex == 0 && i.rex_encoding)
5193     {
5194       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5195          that uses legacy register.  If it is "hi" register, don't add
5196          the REX_OPCODE byte.  */
5197       int x;
5198       for (x = 0; x < 2; x++)
5199         if (i.types[x].bitfield.class == Reg
5200             && i.types[x].bitfield.byte
5201             && (i.op[x].regs->reg_flags & RegRex64) == 0
5202             && i.op[x].regs->reg_num > 3)
5203           {
5204             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5205             i.rex_encoding = false;
5206             break;
5207           }
5208
5209       if (i.rex_encoding)
5210         i.rex = REX_OPCODE;
5211     }
5212
5213   if (i.rex != 0)
5214     add_prefix (REX_OPCODE | i.rex);
5215
5216   insert_lfence_before ();
5217
5218   /* We are ready to output the insn.  */
5219   output_insn ();
5220
5221   insert_lfence_after ();
5222
5223   last_insn.seg = now_seg;
5224
5225   if (i.tm.opcode_modifier.isprefix)
5226     {
5227       last_insn.kind = last_insn_prefix;
5228       last_insn.name = i.tm.name;
5229       last_insn.file = as_where (&last_insn.line);
5230     }
5231   else
5232     last_insn.kind = last_insn_other;
5233 }
5234
5235 static char *
5236 parse_insn (char *line, char *mnemonic)
5237 {
5238   char *l = line;
5239   char *token_start = l;
5240   char *mnem_p;
5241   int supported;
5242   const insn_template *t;
5243   char *dot_p = NULL;
5244
5245   while (1)
5246     {
5247       mnem_p = mnemonic;
5248       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5249         {
5250           if (*mnem_p == '.')
5251             dot_p = mnem_p;
5252           mnem_p++;
5253           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5254             {
5255               as_bad (_("no such instruction: `%s'"), token_start);
5256               return NULL;
5257             }
5258           l++;
5259         }
5260       if (!is_space_char (*l)
5261           && *l != END_OF_INSN
5262           && (intel_syntax
5263               || (*l != PREFIX_SEPARATOR
5264                   && *l != ',')))
5265         {
5266           as_bad (_("invalid character %s in mnemonic"),
5267                   output_invalid (*l));
5268           return NULL;
5269         }
5270       if (token_start == l)
5271         {
5272           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5273             as_bad (_("expecting prefix; got nothing"));
5274           else
5275             as_bad (_("expecting mnemonic; got nothing"));
5276           return NULL;
5277         }
5278
5279       /* Look up instruction (or prefix) via hash table.  */
5280       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5281
5282       if (*l != END_OF_INSN
5283           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5284           && current_templates
5285           && current_templates->start->opcode_modifier.isprefix)
5286         {
5287           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5288             {
5289               as_bad ((flag_code != CODE_64BIT
5290                        ? _("`%s' is only supported in 64-bit mode")
5291                        : _("`%s' is not supported in 64-bit mode")),
5292                       current_templates->start->name);
5293               return NULL;
5294             }
5295           /* If we are in 16-bit mode, do not allow addr16 or data16.
5296              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5297           if ((current_templates->start->opcode_modifier.size == SIZE16
5298                || current_templates->start->opcode_modifier.size == SIZE32)
5299               && flag_code != CODE_64BIT
5300               && ((current_templates->start->opcode_modifier.size == SIZE32)
5301                   ^ (flag_code == CODE_16BIT)))
5302             {
5303               as_bad (_("redundant %s prefix"),
5304                       current_templates->start->name);
5305               return NULL;
5306             }
5307
5308           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5309             {
5310               /* Handle pseudo prefixes.  */
5311               switch (current_templates->start->extension_opcode)
5312                 {
5313                 case Prefix_Disp8:
5314                   /* {disp8} */
5315                   i.disp_encoding = disp_encoding_8bit;
5316                   break;
5317                 case Prefix_Disp16:
5318                   /* {disp16} */
5319                   i.disp_encoding = disp_encoding_16bit;
5320                   break;
5321                 case Prefix_Disp32:
5322                   /* {disp32} */
5323                   i.disp_encoding = disp_encoding_32bit;
5324                   break;
5325                 case Prefix_Load:
5326                   /* {load} */
5327                   i.dir_encoding = dir_encoding_load;
5328                   break;
5329                 case Prefix_Store:
5330                   /* {store} */
5331                   i.dir_encoding = dir_encoding_store;
5332                   break;
5333                 case Prefix_VEX:
5334                   /* {vex} */
5335                   i.vec_encoding = vex_encoding_vex;
5336                   break;
5337                 case Prefix_VEX3:
5338                   /* {vex3} */
5339                   i.vec_encoding = vex_encoding_vex3;
5340                   break;
5341                 case Prefix_EVEX:
5342                   /* {evex} */
5343                   i.vec_encoding = vex_encoding_evex;
5344                   break;
5345                 case Prefix_REX:
5346                   /* {rex} */
5347                   i.rex_encoding = true;
5348                   break;
5349                 case Prefix_NoOptimize:
5350                   /* {nooptimize} */
5351                   i.no_optimize = true;
5352                   break;
5353                 default:
5354                   abort ();
5355                 }
5356             }
5357           else
5358             {
5359               /* Add prefix, checking for repeated prefixes.  */
5360               switch (add_prefix (current_templates->start->base_opcode))
5361                 {
5362                 case PREFIX_EXIST:
5363                   return NULL;
5364                 case PREFIX_DS:
5365                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5366                     i.notrack_prefix = current_templates->start->name;
5367                   break;
5368                 case PREFIX_REP:
5369                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5370                     i.hle_prefix = current_templates->start->name;
5371                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5372                     i.bnd_prefix = current_templates->start->name;
5373                   else
5374                     i.rep_prefix = current_templates->start->name;
5375                   break;
5376                 default:
5377                   break;
5378                 }
5379             }
5380           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5381           token_start = ++l;
5382         }
5383       else
5384         break;
5385     }
5386
5387   if (!current_templates)
5388     {
5389       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5390          Check if we should swap operand or force 32bit displacement in
5391          encoding.  */
5392       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5393         i.dir_encoding = dir_encoding_swap;
5394       else if (mnem_p - 3 == dot_p
5395                && dot_p[1] == 'd'
5396                && dot_p[2] == '8')
5397         i.disp_encoding = disp_encoding_8bit;
5398       else if (mnem_p - 4 == dot_p
5399                && dot_p[1] == 'd'
5400                && dot_p[2] == '3'
5401                && dot_p[3] == '2')
5402         i.disp_encoding = disp_encoding_32bit;
5403       else
5404         goto check_suffix;
5405       mnem_p = dot_p;
5406       *dot_p = '\0';
5407       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5408     }
5409
5410   if (!current_templates)
5411     {
5412     check_suffix:
5413       if (mnem_p > mnemonic)
5414         {
5415           /* See if we can get a match by trimming off a suffix.  */
5416           switch (mnem_p[-1])
5417             {
5418             case WORD_MNEM_SUFFIX:
5419               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5420                 i.suffix = SHORT_MNEM_SUFFIX;
5421               else
5422                 /* Fall through.  */
5423               case BYTE_MNEM_SUFFIX:
5424               case QWORD_MNEM_SUFFIX:
5425                 i.suffix = mnem_p[-1];
5426               mnem_p[-1] = '\0';
5427               current_templates
5428                 = (const templates *) str_hash_find (op_hash, mnemonic);
5429               break;
5430             case SHORT_MNEM_SUFFIX:
5431             case LONG_MNEM_SUFFIX:
5432               if (!intel_syntax)
5433                 {
5434                   i.suffix = mnem_p[-1];
5435                   mnem_p[-1] = '\0';
5436                   current_templates
5437                     = (const templates *) str_hash_find (op_hash, mnemonic);
5438                 }
5439               break;
5440
5441               /* Intel Syntax.  */
5442             case 'd':
5443               if (intel_syntax)
5444                 {
5445                   if (intel_float_operand (mnemonic) == 1)
5446                     i.suffix = SHORT_MNEM_SUFFIX;
5447                   else
5448                     i.suffix = LONG_MNEM_SUFFIX;
5449                   mnem_p[-1] = '\0';
5450                   current_templates
5451                     = (const templates *) str_hash_find (op_hash, mnemonic);
5452                 }
5453               break;
5454             }
5455         }
5456
5457       if (!current_templates)
5458         {
5459           as_bad (_("no such instruction: `%s'"), token_start);
5460           return NULL;
5461         }
5462     }
5463
5464   if (current_templates->start->opcode_modifier.jump == JUMP
5465       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5466     {
5467       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5468          predict taken and predict not taken respectively.
5469          I'm not sure that branch hints actually do anything on loop
5470          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5471          may work in the future and it doesn't hurt to accept them
5472          now.  */
5473       if (l[0] == ',' && l[1] == 'p')
5474         {
5475           if (l[2] == 't')
5476             {
5477               if (!add_prefix (DS_PREFIX_OPCODE))
5478                 return NULL;
5479               l += 3;
5480             }
5481           else if (l[2] == 'n')
5482             {
5483               if (!add_prefix (CS_PREFIX_OPCODE))
5484                 return NULL;
5485               l += 3;
5486             }
5487         }
5488     }
5489   /* Any other comma loses.  */
5490   if (*l == ',')
5491     {
5492       as_bad (_("invalid character %s in mnemonic"),
5493               output_invalid (*l));
5494       return NULL;
5495     }
5496
5497   /* Check if instruction is supported on specified architecture.  */
5498   supported = 0;
5499   for (t = current_templates->start; t < current_templates->end; ++t)
5500     {
5501       supported |= cpu_flags_match (t);
5502       if (supported == CPU_FLAGS_PERFECT_MATCH)
5503         {
5504           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
5505             as_warn (_("use .code16 to ensure correct addressing mode"));
5506
5507           return l;
5508         }
5509     }
5510
5511   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5512     as_bad (flag_code == CODE_64BIT
5513             ? _("`%s' is not supported in 64-bit mode")
5514             : _("`%s' is only supported in 64-bit mode"),
5515             current_templates->start->name);
5516   else
5517     as_bad (_("`%s' is not supported on `%s%s'"),
5518             current_templates->start->name,
5519             cpu_arch_name ? cpu_arch_name : default_arch,
5520             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5521
5522   return NULL;
5523 }
5524
5525 static char *
5526 parse_operands (char *l, const char *mnemonic)
5527 {
5528   char *token_start;
5529
5530   /* 1 if operand is pending after ','.  */
5531   unsigned int expecting_operand = 0;
5532
5533   while (*l != END_OF_INSN)
5534     {
5535       /* Non-zero if operand parens not balanced.  */
5536       unsigned int paren_not_balanced = 0;
5537       /* True if inside double quotes.  */
5538       bool in_quotes = false;
5539
5540       /* Skip optional white space before operand.  */
5541       if (is_space_char (*l))
5542         ++l;
5543       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5544         {
5545           as_bad (_("invalid character %s before operand %d"),
5546                   output_invalid (*l),
5547                   i.operands + 1);
5548           return NULL;
5549         }
5550       token_start = l;  /* After white space.  */
5551       while (in_quotes || paren_not_balanced || *l != ',')
5552         {
5553           if (*l == END_OF_INSN)
5554             {
5555               if (in_quotes)
5556                 {
5557                   as_bad (_("unbalanced double quotes in operand %d."),
5558                           i.operands + 1);
5559                   return NULL;
5560                 }
5561               if (paren_not_balanced)
5562                 {
5563                   know (!intel_syntax);
5564                   as_bad (_("unbalanced parenthesis in operand %d."),
5565                           i.operands + 1);
5566                   return NULL;
5567                 }
5568               else
5569                 break;  /* we are done */
5570             }
5571           else if (*l == '\\' && l[1] == '"')
5572             ++l;
5573           else if (*l == '"')
5574             in_quotes = !in_quotes;
5575           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5576             {
5577               as_bad (_("invalid character %s in operand %d"),
5578                       output_invalid (*l),
5579                       i.operands + 1);
5580               return NULL;
5581             }
5582           if (!intel_syntax && !in_quotes)
5583             {
5584               if (*l == '(')
5585                 ++paren_not_balanced;
5586               if (*l == ')')
5587                 --paren_not_balanced;
5588             }
5589           l++;
5590         }
5591       if (l != token_start)
5592         {                       /* Yes, we've read in another operand.  */
5593           unsigned int operand_ok;
5594           this_operand = i.operands++;
5595           if (i.operands > MAX_OPERANDS)
5596             {
5597               as_bad (_("spurious operands; (%d operands/instruction max)"),
5598                       MAX_OPERANDS);
5599               return NULL;
5600             }
5601           i.types[this_operand].bitfield.unspecified = 1;
5602           /* Now parse operand adding info to 'i' as we go along.  */
5603           END_STRING_AND_SAVE (l);
5604
5605           if (i.mem_operands > 1)
5606             {
5607               as_bad (_("too many memory references for `%s'"),
5608                       mnemonic);
5609               return 0;
5610             }
5611
5612           if (intel_syntax)
5613             operand_ok =
5614               i386_intel_operand (token_start,
5615                                   intel_float_operand (mnemonic));
5616           else
5617             operand_ok = i386_att_operand (token_start);
5618
5619           RESTORE_END_STRING (l);
5620           if (!operand_ok)
5621             return NULL;
5622         }
5623       else
5624         {
5625           if (expecting_operand)
5626             {
5627             expecting_operand_after_comma:
5628               as_bad (_("expecting operand after ','; got nothing"));
5629               return NULL;
5630             }
5631           if (*l == ',')
5632             {
5633               as_bad (_("expecting operand before ','; got nothing"));
5634               return NULL;
5635             }
5636         }
5637
5638       /* Now *l must be either ',' or END_OF_INSN.  */
5639       if (*l == ',')
5640         {
5641           if (*++l == END_OF_INSN)
5642             {
5643               /* Just skip it, if it's \n complain.  */
5644               goto expecting_operand_after_comma;
5645             }
5646           expecting_operand = 1;
5647         }
5648     }
5649   return l;
5650 }
5651
5652 static void
5653 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5654 {
5655   union i386_op temp_op;
5656   i386_operand_type temp_type;
5657   unsigned int temp_flags;
5658   enum bfd_reloc_code_real temp_reloc;
5659
5660   temp_type = i.types[xchg2];
5661   i.types[xchg2] = i.types[xchg1];
5662   i.types[xchg1] = temp_type;
5663
5664   temp_flags = i.flags[xchg2];
5665   i.flags[xchg2] = i.flags[xchg1];
5666   i.flags[xchg1] = temp_flags;
5667
5668   temp_op = i.op[xchg2];
5669   i.op[xchg2] = i.op[xchg1];
5670   i.op[xchg1] = temp_op;
5671
5672   temp_reloc = i.reloc[xchg2];
5673   i.reloc[xchg2] = i.reloc[xchg1];
5674   i.reloc[xchg1] = temp_reloc;
5675
5676   if (i.mask.reg)
5677     {
5678       if (i.mask.operand == xchg1)
5679         i.mask.operand = xchg2;
5680       else if (i.mask.operand == xchg2)
5681         i.mask.operand = xchg1;
5682     }
5683   if (i.broadcast.type)
5684     {
5685       if (i.broadcast.operand == xchg1)
5686         i.broadcast.operand = xchg2;
5687       else if (i.broadcast.operand == xchg2)
5688         i.broadcast.operand = xchg1;
5689     }
5690   if (i.rounding.type != rc_none)
5691     {
5692       if (i.rounding.operand == xchg1)
5693         i.rounding.operand = xchg2;
5694       else if (i.rounding.operand == xchg2)
5695         i.rounding.operand = xchg1;
5696     }
5697 }
5698
5699 static void
5700 swap_operands (void)
5701 {
5702   switch (i.operands)
5703     {
5704     case 5:
5705     case 4:
5706       swap_2_operands (1, i.operands - 2);
5707       /* Fall through.  */
5708     case 3:
5709     case 2:
5710       swap_2_operands (0, i.operands - 1);
5711       break;
5712     default:
5713       abort ();
5714     }
5715
5716   if (i.mem_operands == 2)
5717     {
5718       const reg_entry *temp_seg;
5719       temp_seg = i.seg[0];
5720       i.seg[0] = i.seg[1];
5721       i.seg[1] = temp_seg;
5722     }
5723 }
5724
5725 /* Try to ensure constant immediates are represented in the smallest
5726    opcode possible.  */
5727 static void
5728 optimize_imm (void)
5729 {
5730   char guess_suffix = 0;
5731   int op;
5732
5733   if (i.suffix)
5734     guess_suffix = i.suffix;
5735   else if (i.reg_operands)
5736     {
5737       /* Figure out a suffix from the last register operand specified.
5738          We can't do this properly yet, i.e. excluding special register
5739          instances, but the following works for instructions with
5740          immediates.  In any case, we can't set i.suffix yet.  */
5741       for (op = i.operands; --op >= 0;)
5742         if (i.types[op].bitfield.class != Reg)
5743           continue;
5744         else if (i.types[op].bitfield.byte)
5745           {
5746             guess_suffix = BYTE_MNEM_SUFFIX;
5747             break;
5748           }
5749         else if (i.types[op].bitfield.word)
5750           {
5751             guess_suffix = WORD_MNEM_SUFFIX;
5752             break;
5753           }
5754         else if (i.types[op].bitfield.dword)
5755           {
5756             guess_suffix = LONG_MNEM_SUFFIX;
5757             break;
5758           }
5759         else if (i.types[op].bitfield.qword)
5760           {
5761             guess_suffix = QWORD_MNEM_SUFFIX;
5762             break;
5763           }
5764     }
5765   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5766     guess_suffix = WORD_MNEM_SUFFIX;
5767
5768   for (op = i.operands; --op >= 0;)
5769     if (operand_type_check (i.types[op], imm))
5770       {
5771         switch (i.op[op].imms->X_op)
5772           {
5773           case O_constant:
5774             /* If a suffix is given, this operand may be shortened.  */
5775             switch (guess_suffix)
5776               {
5777               case LONG_MNEM_SUFFIX:
5778                 i.types[op].bitfield.imm32 = 1;
5779                 i.types[op].bitfield.imm64 = 1;
5780                 break;
5781               case WORD_MNEM_SUFFIX:
5782                 i.types[op].bitfield.imm16 = 1;
5783                 i.types[op].bitfield.imm32 = 1;
5784                 i.types[op].bitfield.imm32s = 1;
5785                 i.types[op].bitfield.imm64 = 1;
5786                 break;
5787               case BYTE_MNEM_SUFFIX:
5788                 i.types[op].bitfield.imm8 = 1;
5789                 i.types[op].bitfield.imm8s = 1;
5790                 i.types[op].bitfield.imm16 = 1;
5791                 i.types[op].bitfield.imm32 = 1;
5792                 i.types[op].bitfield.imm32s = 1;
5793                 i.types[op].bitfield.imm64 = 1;
5794                 break;
5795               }
5796
5797             /* If this operand is at most 16 bits, convert it
5798                to a signed 16 bit number before trying to see
5799                whether it will fit in an even smaller size.
5800                This allows a 16-bit operand such as $0xffe0 to
5801                be recognised as within Imm8S range.  */
5802             if ((i.types[op].bitfield.imm16)
5803                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
5804               {
5805                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5806                                                 ^ 0x8000) - 0x8000);
5807               }
5808 #ifdef BFD64
5809             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5810             if ((i.types[op].bitfield.imm32)
5811                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
5812               {
5813                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5814                                                 ^ ((offsetT) 1 << 31))
5815                                                - ((offsetT) 1 << 31));
5816               }
5817 #endif
5818             i.types[op]
5819               = operand_type_or (i.types[op],
5820                                  smallest_imm_type (i.op[op].imms->X_add_number));
5821
5822             /* We must avoid matching of Imm32 templates when 64bit
5823                only immediate is available.  */
5824             if (guess_suffix == QWORD_MNEM_SUFFIX)
5825               i.types[op].bitfield.imm32 = 0;
5826             break;
5827
5828           case O_absent:
5829           case O_register:
5830             abort ();
5831
5832             /* Symbols and expressions.  */
5833           default:
5834             /* Convert symbolic operand to proper sizes for matching, but don't
5835                prevent matching a set of insns that only supports sizes other
5836                than those matching the insn suffix.  */
5837             {
5838               i386_operand_type mask, allowed;
5839               const insn_template *t = current_templates->start;
5840
5841               operand_type_set (&mask, 0);
5842               allowed = t->operand_types[op];
5843
5844               while (++t < current_templates->end)
5845                 {
5846                   allowed = operand_type_and (allowed, anyimm);
5847                   allowed = operand_type_or (allowed, t->operand_types[op]);
5848                 }
5849               switch (guess_suffix)
5850                 {
5851                 case QWORD_MNEM_SUFFIX:
5852                   mask.bitfield.imm64 = 1;
5853                   mask.bitfield.imm32s = 1;
5854                   break;
5855                 case LONG_MNEM_SUFFIX:
5856                   mask.bitfield.imm32 = 1;
5857                   break;
5858                 case WORD_MNEM_SUFFIX:
5859                   mask.bitfield.imm16 = 1;
5860                   break;
5861                 case BYTE_MNEM_SUFFIX:
5862                   mask.bitfield.imm8 = 1;
5863                   break;
5864                 default:
5865                   break;
5866                 }
5867               allowed = operand_type_and (mask, allowed);
5868               if (!operand_type_all_zero (&allowed))
5869                 i.types[op] = operand_type_and (i.types[op], mask);
5870             }
5871             break;
5872           }
5873       }
5874 }
5875
5876 /* Try to use the smallest displacement type too.  */
5877 static void
5878 optimize_disp (void)
5879 {
5880   int op;
5881
5882   for (op = i.operands; --op >= 0;)
5883     if (operand_type_check (i.types[op], disp))
5884       {
5885         if (i.op[op].disps->X_op == O_constant)
5886           {
5887             offsetT op_disp = i.op[op].disps->X_add_number;
5888
5889             if (!op_disp && i.types[op].bitfield.baseindex)
5890               {
5891                 i.types[op].bitfield.disp8 = 0;
5892                 i.types[op].bitfield.disp16 = 0;
5893                 i.types[op].bitfield.disp32 = 0;
5894                 i.types[op].bitfield.disp32s = 0;
5895                 i.types[op].bitfield.disp64 = 0;
5896                 i.op[op].disps = 0;
5897                 i.disp_operands--;
5898                 continue;
5899               }
5900
5901             if (i.types[op].bitfield.disp16
5902                 && fits_in_unsigned_word (op_disp))
5903               {
5904                 /* If this operand is at most 16 bits, convert
5905                    to a signed 16 bit number and don't use 64bit
5906                    displacement.  */
5907                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
5908                 i.types[op].bitfield.disp64 = 0;
5909               }
5910
5911 #ifdef BFD64
5912             if (flag_code == CODE_64BIT)
5913               {
5914                 /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5915                 if ((i.types[op].bitfield.disp32
5916                      || want_disp32 (current_templates->start))
5917                     && fits_in_unsigned_long (op_disp))
5918                   {
5919                     /* If this operand is at most 32 bits, convert
5920                        to a signed 32 bit number and don't use 64bit
5921                        displacement.  */
5922                     op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5923                     i.types[op].bitfield.disp64 = 0;
5924                     i.types[op].bitfield.disp32 = 1;
5925                   }
5926
5927                 if (fits_in_signed_long (op_disp))
5928                   {
5929                     i.types[op].bitfield.disp64 = 0;
5930                     i.types[op].bitfield.disp32s = 1;
5931                   }
5932               }
5933 #endif
5934             if ((i.types[op].bitfield.disp32
5935                  || i.types[op].bitfield.disp32s
5936                  || i.types[op].bitfield.disp16)
5937                 && fits_in_disp8 (op_disp))
5938               i.types[op].bitfield.disp8 = 1;
5939
5940             i.op[op].disps->X_add_number = op_disp;
5941           }
5942         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5943                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5944           {
5945             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5946                          i.op[op].disps, 0, i.reloc[op]);
5947             i.types[op].bitfield.disp8 = 0;
5948             i.types[op].bitfield.disp16 = 0;
5949             i.types[op].bitfield.disp32 = 0;
5950             i.types[op].bitfield.disp32s = 0;
5951             i.types[op].bitfield.disp64 = 0;
5952           }
5953         else
5954           /* We only support 64bit displacement on constants.  */
5955           i.types[op].bitfield.disp64 = 0;
5956       }
5957 }
5958
5959 /* Return 1 if there is a match in broadcast bytes between operand
5960    GIVEN and instruction template T.   */
5961
5962 static INLINE int
5963 match_broadcast_size (const insn_template *t, unsigned int given)
5964 {
5965   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5966            && i.types[given].bitfield.byte)
5967           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5968               && i.types[given].bitfield.word)
5969           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5970               && i.types[given].bitfield.dword)
5971           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5972               && i.types[given].bitfield.qword));
5973 }
5974
5975 /* Check if operands are valid for the instruction.  */
5976
5977 static int
5978 check_VecOperands (const insn_template *t)
5979 {
5980   unsigned int op;
5981   i386_cpu_flags cpu;
5982
5983   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5984      any one operand are implicity requiring AVX512VL support if the actual
5985      operand size is YMMword or XMMword.  Since this function runs after
5986      template matching, there's no need to check for YMMword/XMMword in
5987      the template.  */
5988   cpu = cpu_flags_and (t->cpu_flags, avx512);
5989   if (!cpu_flags_all_zero (&cpu)
5990       && !t->cpu_flags.bitfield.cpuavx512vl
5991       && !cpu_arch_flags.bitfield.cpuavx512vl)
5992     {
5993       for (op = 0; op < t->operands; ++op)
5994         {
5995           if (t->operand_types[op].bitfield.zmmword
5996               && (i.types[op].bitfield.ymmword
5997                   || i.types[op].bitfield.xmmword))
5998             {
5999               i.error = unsupported;
6000               return 1;
6001             }
6002         }
6003     }
6004
6005   /* Without VSIB byte, we can't have a vector register for index.  */
6006   if (!t->opcode_modifier.sib
6007       && i.index_reg
6008       && (i.index_reg->reg_type.bitfield.xmmword
6009           || i.index_reg->reg_type.bitfield.ymmword
6010           || i.index_reg->reg_type.bitfield.zmmword))
6011     {
6012       i.error = unsupported_vector_index_register;
6013       return 1;
6014     }
6015
6016   /* Check if default mask is allowed.  */
6017   if (t->opcode_modifier.nodefmask
6018       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6019     {
6020       i.error = no_default_mask;
6021       return 1;
6022     }
6023
6024   /* For VSIB byte, we need a vector register for index, and all vector
6025      registers must be distinct.  */
6026   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6027     {
6028       if (!i.index_reg
6029           || !((t->opcode_modifier.sib == VECSIB128
6030                 && i.index_reg->reg_type.bitfield.xmmword)
6031                || (t->opcode_modifier.sib == VECSIB256
6032                    && i.index_reg->reg_type.bitfield.ymmword)
6033                || (t->opcode_modifier.sib == VECSIB512
6034                    && i.index_reg->reg_type.bitfield.zmmword)))
6035       {
6036         i.error = invalid_vsib_address;
6037         return 1;
6038       }
6039
6040       gas_assert (i.reg_operands == 2 || i.mask.reg);
6041       if (i.reg_operands == 2 && !i.mask.reg)
6042         {
6043           gas_assert (i.types[0].bitfield.class == RegSIMD);
6044           gas_assert (i.types[0].bitfield.xmmword
6045                       || i.types[0].bitfield.ymmword);
6046           gas_assert (i.types[2].bitfield.class == RegSIMD);
6047           gas_assert (i.types[2].bitfield.xmmword
6048                       || i.types[2].bitfield.ymmword);
6049           if (operand_check == check_none)
6050             return 0;
6051           if (register_number (i.op[0].regs)
6052               != register_number (i.index_reg)
6053               && register_number (i.op[2].regs)
6054                  != register_number (i.index_reg)
6055               && register_number (i.op[0].regs)
6056                  != register_number (i.op[2].regs))
6057             return 0;
6058           if (operand_check == check_error)
6059             {
6060               i.error = invalid_vector_register_set;
6061               return 1;
6062             }
6063           as_warn (_("mask, index, and destination registers should be distinct"));
6064         }
6065       else if (i.reg_operands == 1 && i.mask.reg)
6066         {
6067           if (i.types[1].bitfield.class == RegSIMD
6068               && (i.types[1].bitfield.xmmword
6069                   || i.types[1].bitfield.ymmword
6070                   || i.types[1].bitfield.zmmword)
6071               && (register_number (i.op[1].regs)
6072                   == register_number (i.index_reg)))
6073             {
6074               if (operand_check == check_error)
6075                 {
6076                   i.error = invalid_vector_register_set;
6077                   return 1;
6078                 }
6079               if (operand_check != check_none)
6080                 as_warn (_("index and destination registers should be distinct"));
6081             }
6082         }
6083     }
6084
6085   /* For AMX instructions with three tmmword operands, all tmmword operand must be
6086      distinct */
6087   if (t->operand_types[0].bitfield.tmmword
6088       && i.reg_operands == 3)
6089     {
6090       if (register_number (i.op[0].regs)
6091           == register_number (i.op[1].regs)
6092           || register_number (i.op[0].regs)
6093              == register_number (i.op[2].regs)
6094           || register_number (i.op[1].regs)
6095              == register_number (i.op[2].regs))
6096         {
6097           i.error = invalid_tmm_register_set;
6098           return 1;
6099         }
6100     }
6101
6102   /* Check if broadcast is supported by the instruction and is applied
6103      to the memory operand.  */
6104   if (i.broadcast.type)
6105     {
6106       i386_operand_type type, overlap;
6107
6108       /* Check if specified broadcast is supported in this instruction,
6109          and its broadcast bytes match the memory operand.  */
6110       op = i.broadcast.operand;
6111       if (!t->opcode_modifier.broadcast
6112           || !(i.flags[op] & Operand_Mem)
6113           || (!i.types[op].bitfield.unspecified
6114               && !match_broadcast_size (t, op)))
6115         {
6116         bad_broadcast:
6117           i.error = unsupported_broadcast;
6118           return 1;
6119         }
6120
6121       i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6122                            * i.broadcast.type);
6123       operand_type_set (&type, 0);
6124       switch (i.broadcast.bytes)
6125         {
6126         case 2:
6127           type.bitfield.word = 1;
6128           break;
6129         case 4:
6130           type.bitfield.dword = 1;
6131           break;
6132         case 8:
6133           type.bitfield.qword = 1;
6134           break;
6135         case 16:
6136           type.bitfield.xmmword = 1;
6137           break;
6138         case 32:
6139           type.bitfield.ymmword = 1;
6140           break;
6141         case 64:
6142           type.bitfield.zmmword = 1;
6143           break;
6144         default:
6145           goto bad_broadcast;
6146         }
6147
6148       overlap = operand_type_and (type, t->operand_types[op]);
6149       if (t->operand_types[op].bitfield.class == RegSIMD
6150           && t->operand_types[op].bitfield.byte
6151              + t->operand_types[op].bitfield.word
6152              + t->operand_types[op].bitfield.dword
6153              + t->operand_types[op].bitfield.qword > 1)
6154         {
6155           overlap.bitfield.xmmword = 0;
6156           overlap.bitfield.ymmword = 0;
6157           overlap.bitfield.zmmword = 0;
6158         }
6159       if (operand_type_all_zero (&overlap))
6160           goto bad_broadcast;
6161
6162       if (t->opcode_modifier.checkregsize)
6163         {
6164           unsigned int j;
6165
6166           type.bitfield.baseindex = 1;
6167           for (j = 0; j < i.operands; ++j)
6168             {
6169               if (j != op
6170                   && !operand_type_register_match(i.types[j],
6171                                                   t->operand_types[j],
6172                                                   type,
6173                                                   t->operand_types[op]))
6174                 goto bad_broadcast;
6175             }
6176         }
6177     }
6178   /* If broadcast is supported in this instruction, we need to check if
6179      operand of one-element size isn't specified without broadcast.  */
6180   else if (t->opcode_modifier.broadcast && i.mem_operands)
6181     {
6182       /* Find memory operand.  */
6183       for (op = 0; op < i.operands; op++)
6184         if (i.flags[op] & Operand_Mem)
6185           break;
6186       gas_assert (op < i.operands);
6187       /* Check size of the memory operand.  */
6188       if (match_broadcast_size (t, op))
6189         {
6190           i.error = broadcast_needed;
6191           return 1;
6192         }
6193     }
6194   else
6195     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6196
6197   /* Check if requested masking is supported.  */
6198   if (i.mask.reg)
6199     {
6200       switch (t->opcode_modifier.masking)
6201         {
6202         case BOTH_MASKING:
6203           break;
6204         case MERGING_MASKING:
6205           if (i.mask.zeroing)
6206             {
6207         case 0:
6208               i.error = unsupported_masking;
6209               return 1;
6210             }
6211           break;
6212         case DYNAMIC_MASKING:
6213           /* Memory destinations allow only merging masking.  */
6214           if (i.mask.zeroing && i.mem_operands)
6215             {
6216               /* Find memory operand.  */
6217               for (op = 0; op < i.operands; op++)
6218                 if (i.flags[op] & Operand_Mem)
6219                   break;
6220               gas_assert (op < i.operands);
6221               if (op == i.operands - 1)
6222                 {
6223                   i.error = unsupported_masking;
6224                   return 1;
6225                 }
6226             }
6227           break;
6228         default:
6229           abort ();
6230         }
6231     }
6232
6233   /* Check if masking is applied to dest operand.  */
6234   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6235     {
6236       i.error = mask_not_on_destination;
6237       return 1;
6238     }
6239
6240   /* Check RC/SAE.  */
6241   if (i.rounding.type != rc_none)
6242     {
6243       if (!t->opcode_modifier.sae
6244           || (i.rounding.type != saeonly && !t->opcode_modifier.staticrounding))
6245         {
6246           i.error = unsupported_rc_sae;
6247           return 1;
6248         }
6249       /* If the instruction has several immediate operands and one of
6250          them is rounding, the rounding operand should be the last
6251          immediate operand.  */
6252       if (i.imm_operands > 1
6253           && i.rounding.operand != i.imm_operands - 1)
6254         {
6255           i.error = rc_sae_operand_not_last_imm;
6256           return 1;
6257         }
6258     }
6259
6260   /* Check the special Imm4 cases; must be the first operand.  */
6261   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6262     {
6263       if (i.op[0].imms->X_op != O_constant
6264           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6265         {
6266           i.error = bad_imm4;
6267           return 1;
6268         }
6269
6270       /* Turn off Imm<N> so that update_imm won't complain.  */
6271       operand_type_set (&i.types[0], 0);
6272     }
6273
6274   /* Check vector Disp8 operand.  */
6275   if (t->opcode_modifier.disp8memshift
6276       && i.disp_encoding != disp_encoding_32bit)
6277     {
6278       if (i.broadcast.type)
6279         i.memshift = t->opcode_modifier.broadcast - 1;
6280       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6281         i.memshift = t->opcode_modifier.disp8memshift;
6282       else
6283         {
6284           const i386_operand_type *type = NULL;
6285
6286           i.memshift = 0;
6287           for (op = 0; op < i.operands; op++)
6288             if (i.flags[op] & Operand_Mem)
6289               {
6290                 if (t->opcode_modifier.evex == EVEXLIG)
6291                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6292                 else if (t->operand_types[op].bitfield.xmmword
6293                          + t->operand_types[op].bitfield.ymmword
6294                          + t->operand_types[op].bitfield.zmmword <= 1)
6295                   type = &t->operand_types[op];
6296                 else if (!i.types[op].bitfield.unspecified)
6297                   type = &i.types[op];
6298               }
6299             else if (i.types[op].bitfield.class == RegSIMD
6300                      && t->opcode_modifier.evex != EVEXLIG)
6301               {
6302                 if (i.types[op].bitfield.zmmword)
6303                   i.memshift = 6;
6304                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6305                   i.memshift = 5;
6306                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6307                   i.memshift = 4;
6308               }
6309
6310           if (type)
6311             {
6312               if (type->bitfield.zmmword)
6313                 i.memshift = 6;
6314               else if (type->bitfield.ymmword)
6315                 i.memshift = 5;
6316               else if (type->bitfield.xmmword)
6317                 i.memshift = 4;
6318             }
6319
6320           /* For the check in fits_in_disp8().  */
6321           if (i.memshift == 0)
6322             i.memshift = -1;
6323         }
6324
6325       for (op = 0; op < i.operands; op++)
6326         if (operand_type_check (i.types[op], disp)
6327             && i.op[op].disps->X_op == O_constant)
6328           {
6329             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6330               {
6331                 i.types[op].bitfield.disp8 = 1;
6332                 return 0;
6333               }
6334             i.types[op].bitfield.disp8 = 0;
6335           }
6336     }
6337
6338   i.memshift = 0;
6339
6340   return 0;
6341 }
6342
6343 /* Check if encoding requirements are met by the instruction.  */
6344
6345 static int
6346 VEX_check_encoding (const insn_template *t)
6347 {
6348   if (i.vec_encoding == vex_encoding_error)
6349     {
6350       i.error = unsupported;
6351       return 1;
6352     }
6353
6354   if (i.vec_encoding == vex_encoding_evex)
6355     {
6356       /* This instruction must be encoded with EVEX prefix.  */
6357       if (!is_evex_encoding (t))
6358         {
6359           i.error = unsupported;
6360           return 1;
6361         }
6362       return 0;
6363     }
6364
6365   if (!t->opcode_modifier.vex)
6366     {
6367       /* This instruction template doesn't have VEX prefix.  */
6368       if (i.vec_encoding != vex_encoding_default)
6369         {
6370           i.error = unsupported;
6371           return 1;
6372         }
6373       return 0;
6374     }
6375
6376   return 0;
6377 }
6378
6379 static const insn_template *
6380 match_template (char mnem_suffix)
6381 {
6382   /* Points to template once we've found it.  */
6383   const insn_template *t;
6384   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6385   i386_operand_type overlap4;
6386   unsigned int found_reverse_match;
6387   i386_opcode_modifier suffix_check;
6388   i386_operand_type operand_types [MAX_OPERANDS];
6389   int addr_prefix_disp;
6390   unsigned int j, size_match, check_register;
6391   enum i386_error specific_error = 0;
6392
6393 #if MAX_OPERANDS != 5
6394 # error "MAX_OPERANDS must be 5."
6395 #endif
6396
6397   found_reverse_match = 0;
6398   addr_prefix_disp = -1;
6399
6400   /* Prepare for mnemonic suffix check.  */
6401   memset (&suffix_check, 0, sizeof (suffix_check));
6402   switch (mnem_suffix)
6403     {
6404     case BYTE_MNEM_SUFFIX:
6405       suffix_check.no_bsuf = 1;
6406       break;
6407     case WORD_MNEM_SUFFIX:
6408       suffix_check.no_wsuf = 1;
6409       break;
6410     case SHORT_MNEM_SUFFIX:
6411       suffix_check.no_ssuf = 1;
6412       break;
6413     case LONG_MNEM_SUFFIX:
6414       suffix_check.no_lsuf = 1;
6415       break;
6416     case QWORD_MNEM_SUFFIX:
6417       suffix_check.no_qsuf = 1;
6418       break;
6419     default:
6420       /* NB: In Intel syntax, normally we can check for memory operand
6421          size when there is no mnemonic suffix.  But jmp and call have
6422          2 different encodings with Dword memory operand size, one with
6423          No_ldSuf and the other without.  i.suffix is set to
6424          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
6425       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
6426         suffix_check.no_ldsuf = 1;
6427     }
6428
6429   /* Must have right number of operands.  */
6430   i.error = number_of_operands_mismatch;
6431
6432   for (t = current_templates->start; t < current_templates->end; t++)
6433     {
6434       addr_prefix_disp = -1;
6435       found_reverse_match = 0;
6436
6437       if (i.operands != t->operands)
6438         continue;
6439
6440       /* Check processor support.  */
6441       i.error = unsupported;
6442       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6443         continue;
6444
6445       /* Check Pseudo Prefix.  */
6446       i.error = unsupported;
6447       if (t->opcode_modifier.pseudovexprefix
6448           && !(i.vec_encoding == vex_encoding_vex
6449               || i.vec_encoding == vex_encoding_vex3))
6450         continue;
6451
6452       /* Check AT&T mnemonic.   */
6453       i.error = unsupported_with_intel_mnemonic;
6454       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6455         continue;
6456
6457       /* Check AT&T/Intel syntax.  */
6458       i.error = unsupported_syntax;
6459       if ((intel_syntax && t->opcode_modifier.attsyntax)
6460           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6461         continue;
6462
6463       /* Check Intel64/AMD64 ISA.   */
6464       switch (isa64)
6465         {
6466         default:
6467           /* Default: Don't accept Intel64.  */
6468           if (t->opcode_modifier.isa64 == INTEL64)
6469             continue;
6470           break;
6471         case amd64:
6472           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6473           if (t->opcode_modifier.isa64 >= INTEL64)
6474             continue;
6475           break;
6476         case intel64:
6477           /* -mintel64: Don't accept AMD64.  */
6478           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6479             continue;
6480           break;
6481         }
6482
6483       /* Check the suffix.  */
6484       i.error = invalid_instruction_suffix;
6485       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
6486           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
6487           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
6488           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
6489           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
6490           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
6491         continue;
6492
6493       size_match = operand_size_match (t);
6494       if (!size_match)
6495         continue;
6496
6497       /* This is intentionally not
6498
6499          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6500
6501          as the case of a missing * on the operand is accepted (perhaps with
6502          a warning, issued further down).  */
6503       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6504         {
6505           i.error = operand_type_mismatch;
6506           continue;
6507         }
6508
6509       for (j = 0; j < MAX_OPERANDS; j++)
6510         operand_types[j] = t->operand_types[j];
6511
6512       /* In general, don't allow
6513          - 64-bit operands outside of 64-bit mode,
6514          - 32-bit operands on pre-386.  */
6515       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6516       if (((i.suffix == QWORD_MNEM_SUFFIX
6517             && flag_code != CODE_64BIT
6518             && !(t->opcode_modifier.opcodespace == SPACE_0F
6519                  && t->base_opcode == 0xc7
6520                  && t->opcode_modifier.opcodeprefix == PREFIX_NONE
6521                  && t->extension_opcode == 1) /* cmpxchg8b */)
6522            || (i.suffix == LONG_MNEM_SUFFIX
6523                && !cpu_arch_flags.bitfield.cpui386))
6524           && (intel_syntax
6525               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6526                  && !intel_float_operand (t->name))
6527               : intel_float_operand (t->name) != 2)
6528           && (t->operands == i.imm_operands
6529               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6530                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6531                && operand_types[i.imm_operands].bitfield.class != RegMask)
6532               || (operand_types[j].bitfield.class != RegMMX
6533                   && operand_types[j].bitfield.class != RegSIMD
6534                   && operand_types[j].bitfield.class != RegMask))
6535           && !t->opcode_modifier.sib)
6536         continue;
6537
6538       /* Do not verify operands when there are none.  */
6539       if (!t->operands)
6540         {
6541           if (VEX_check_encoding (t))
6542             {
6543               specific_error = i.error;
6544               continue;
6545             }
6546
6547           /* We've found a match; break out of loop.  */
6548           break;
6549         }
6550
6551       if (!t->opcode_modifier.jump
6552           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6553         {
6554           /* There should be only one Disp operand.  */
6555           for (j = 0; j < MAX_OPERANDS; j++)
6556             if (operand_type_check (operand_types[j], disp))
6557               break;
6558           if (j < MAX_OPERANDS)
6559             {
6560               bool override = (i.prefix[ADDR_PREFIX] != 0);
6561
6562               addr_prefix_disp = j;
6563
6564               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
6565                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
6566               switch (flag_code)
6567                 {
6568                 case CODE_16BIT:
6569                   override = !override;
6570                   /* Fall through.  */
6571                 case CODE_32BIT:
6572                   if (operand_types[j].bitfield.disp32
6573                       && operand_types[j].bitfield.disp16)
6574                     {
6575                       operand_types[j].bitfield.disp16 = override;
6576                       operand_types[j].bitfield.disp32 = !override;
6577                     }
6578                   operand_types[j].bitfield.disp32s = 0;
6579                   operand_types[j].bitfield.disp64 = 0;
6580                   break;
6581
6582                 case CODE_64BIT:
6583                   if (operand_types[j].bitfield.disp32s
6584                       || operand_types[j].bitfield.disp64)
6585                     {
6586                       operand_types[j].bitfield.disp64 &= !override;
6587                       operand_types[j].bitfield.disp32s &= !override;
6588                       operand_types[j].bitfield.disp32 = override;
6589                     }
6590                   operand_types[j].bitfield.disp16 = 0;
6591                   break;
6592                 }
6593             }
6594         }
6595
6596       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6597       if (i.reloc[0] == BFD_RELOC_386_GOT32
6598           && t->base_opcode == 0xa0
6599           && t->opcode_modifier.opcodespace == SPACE_BASE)
6600         continue;
6601
6602       /* We check register size if needed.  */
6603       if (t->opcode_modifier.checkregsize)
6604         {
6605           check_register = (1 << t->operands) - 1;
6606           if (i.broadcast.type)
6607             check_register &= ~(1 << i.broadcast.operand);
6608         }
6609       else
6610         check_register = 0;
6611
6612       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6613       switch (t->operands)
6614         {
6615         case 1:
6616           if (!operand_type_match (overlap0, i.types[0]))
6617             continue;
6618           break;
6619         case 2:
6620           /* xchg %eax, %eax is a special case. It is an alias for nop
6621              only in 32bit mode and we can use opcode 0x90.  In 64bit
6622              mode, we can't use 0x90 for xchg %eax, %eax since it should
6623              zero-extend %eax to %rax.  */
6624           if (flag_code == CODE_64BIT
6625               && t->base_opcode == 0x90
6626               && t->opcode_modifier.opcodespace == SPACE_BASE
6627               && i.types[0].bitfield.instance == Accum
6628               && i.types[0].bitfield.dword
6629               && i.types[1].bitfield.instance == Accum
6630               && i.types[1].bitfield.dword)
6631             continue;
6632           /* xrelease mov %eax, <disp> is another special case. It must not
6633              match the accumulator-only encoding of mov.  */
6634           if (flag_code != CODE_64BIT
6635               && i.hle_prefix
6636               && t->base_opcode == 0xa0
6637               && t->opcode_modifier.opcodespace == SPACE_BASE
6638               && i.types[0].bitfield.instance == Accum
6639               && (i.flags[1] & Operand_Mem))
6640             continue;
6641           /* Fall through.  */
6642
6643         case 3:
6644           if (!(size_match & MATCH_STRAIGHT))
6645             goto check_reverse;
6646           /* Reverse direction of operands if swapping is possible in the first
6647              place (operands need to be symmetric) and
6648              - the load form is requested, and the template is a store form,
6649              - the store form is requested, and the template is a load form,
6650              - the non-default (swapped) form is requested.  */
6651           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6652           if (t->opcode_modifier.d && i.reg_operands == i.operands
6653               && !operand_type_all_zero (&overlap1))
6654             switch (i.dir_encoding)
6655               {
6656               case dir_encoding_load:
6657                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6658                     || t->opcode_modifier.regmem)
6659                   goto check_reverse;
6660                 break;
6661
6662               case dir_encoding_store:
6663                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6664                     && !t->opcode_modifier.regmem)
6665                   goto check_reverse;
6666                 break;
6667
6668               case dir_encoding_swap:
6669                 goto check_reverse;
6670
6671               case dir_encoding_default:
6672                 break;
6673               }
6674           /* If we want store form, we skip the current load.  */
6675           if ((i.dir_encoding == dir_encoding_store
6676                || i.dir_encoding == dir_encoding_swap)
6677               && i.mem_operands == 0
6678               && t->opcode_modifier.load)
6679             continue;
6680           /* Fall through.  */
6681         case 4:
6682         case 5:
6683           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6684           if (!operand_type_match (overlap0, i.types[0])
6685               || !operand_type_match (overlap1, i.types[1])
6686               || ((check_register & 3) == 3
6687                   && !operand_type_register_match (i.types[0],
6688                                                    operand_types[0],
6689                                                    i.types[1],
6690                                                    operand_types[1])))
6691             {
6692               /* Check if other direction is valid ...  */
6693               if (!t->opcode_modifier.d)
6694                 continue;
6695
6696             check_reverse:
6697               if (!(size_match & MATCH_REVERSE))
6698                 continue;
6699               /* Try reversing direction of operands.  */
6700               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6701               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6702               if (!operand_type_match (overlap0, i.types[0])
6703                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6704                   || (check_register
6705                       && !operand_type_register_match (i.types[0],
6706                                                        operand_types[i.operands - 1],
6707                                                        i.types[i.operands - 1],
6708                                                        operand_types[0])))
6709                 {
6710                   /* Does not match either direction.  */
6711                   continue;
6712                 }
6713               /* found_reverse_match holds which of D or FloatR
6714                  we've found.  */
6715               if (!t->opcode_modifier.d)
6716                 found_reverse_match = 0;
6717               else if (operand_types[0].bitfield.tbyte)
6718                 found_reverse_match = Opcode_FloatD;
6719               else if (operand_types[0].bitfield.xmmword
6720                        || operand_types[i.operands - 1].bitfield.xmmword
6721                        || operand_types[0].bitfield.class == RegMMX
6722                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6723                        || is_any_vex_encoding(t))
6724                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6725                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6726               else
6727                 found_reverse_match = Opcode_D;
6728               if (t->opcode_modifier.floatr)
6729                 found_reverse_match |= Opcode_FloatR;
6730             }
6731           else
6732             {
6733               /* Found a forward 2 operand match here.  */
6734               switch (t->operands)
6735                 {
6736                 case 5:
6737                   overlap4 = operand_type_and (i.types[4],
6738                                                operand_types[4]);
6739                   /* Fall through.  */
6740                 case 4:
6741                   overlap3 = operand_type_and (i.types[3],
6742                                                operand_types[3]);
6743                   /* Fall through.  */
6744                 case 3:
6745                   overlap2 = operand_type_and (i.types[2],
6746                                                operand_types[2]);
6747                   break;
6748                 }
6749
6750               switch (t->operands)
6751                 {
6752                 case 5:
6753                   if (!operand_type_match (overlap4, i.types[4])
6754                       || !operand_type_register_match (i.types[3],
6755                                                        operand_types[3],
6756                                                        i.types[4],
6757                                                        operand_types[4]))
6758                     continue;
6759                   /* Fall through.  */
6760                 case 4:
6761                   if (!operand_type_match (overlap3, i.types[3])
6762                       || ((check_register & 0xa) == 0xa
6763                           && !operand_type_register_match (i.types[1],
6764                                                             operand_types[1],
6765                                                             i.types[3],
6766                                                             operand_types[3]))
6767                       || ((check_register & 0xc) == 0xc
6768                           && !operand_type_register_match (i.types[2],
6769                                                             operand_types[2],
6770                                                             i.types[3],
6771                                                             operand_types[3])))
6772                     continue;
6773                   /* Fall through.  */
6774                 case 3:
6775                   /* Here we make use of the fact that there are no
6776                      reverse match 3 operand instructions.  */
6777                   if (!operand_type_match (overlap2, i.types[2])
6778                       || ((check_register & 5) == 5
6779                           && !operand_type_register_match (i.types[0],
6780                                                             operand_types[0],
6781                                                             i.types[2],
6782                                                             operand_types[2]))
6783                       || ((check_register & 6) == 6
6784                           && !operand_type_register_match (i.types[1],
6785                                                             operand_types[1],
6786                                                             i.types[2],
6787                                                             operand_types[2])))
6788                     continue;
6789                   break;
6790                 }
6791             }
6792           /* Found either forward/reverse 2, 3 or 4 operand match here:
6793              slip through to break.  */
6794         }
6795
6796       /* Check if vector operands are valid.  */
6797       if (check_VecOperands (t))
6798         {
6799           specific_error = i.error;
6800           continue;
6801         }
6802
6803       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6804       if (VEX_check_encoding (t))
6805         {
6806           specific_error = i.error;
6807           continue;
6808         }
6809
6810       /* We've found a match; break out of loop.  */
6811       break;
6812     }
6813
6814   if (t == current_templates->end)
6815     {
6816       /* We found no match.  */
6817       const char *err_msg;
6818       switch (specific_error ? specific_error : i.error)
6819         {
6820         default:
6821           abort ();
6822         case operand_size_mismatch:
6823           err_msg = _("operand size mismatch");
6824           break;
6825         case operand_type_mismatch:
6826           err_msg = _("operand type mismatch");
6827           break;
6828         case register_type_mismatch:
6829           err_msg = _("register type mismatch");
6830           break;
6831         case number_of_operands_mismatch:
6832           err_msg = _("number of operands mismatch");
6833           break;
6834         case invalid_instruction_suffix:
6835           err_msg = _("invalid instruction suffix");
6836           break;
6837         case bad_imm4:
6838           err_msg = _("constant doesn't fit in 4 bits");
6839           break;
6840         case unsupported_with_intel_mnemonic:
6841           err_msg = _("unsupported with Intel mnemonic");
6842           break;
6843         case unsupported_syntax:
6844           err_msg = _("unsupported syntax");
6845           break;
6846         case unsupported:
6847           as_bad (_("unsupported instruction `%s'"),
6848                   current_templates->start->name);
6849           return NULL;
6850         case invalid_sib_address:
6851           err_msg = _("invalid SIB address");
6852           break;
6853         case invalid_vsib_address:
6854           err_msg = _("invalid VSIB address");
6855           break;
6856         case invalid_vector_register_set:
6857           err_msg = _("mask, index, and destination registers must be distinct");
6858           break;
6859         case invalid_tmm_register_set:
6860           err_msg = _("all tmm registers must be distinct");
6861           break;
6862         case unsupported_vector_index_register:
6863           err_msg = _("unsupported vector index register");
6864           break;
6865         case unsupported_broadcast:
6866           err_msg = _("unsupported broadcast");
6867           break;
6868         case broadcast_needed:
6869           err_msg = _("broadcast is needed for operand of such type");
6870           break;
6871         case unsupported_masking:
6872           err_msg = _("unsupported masking");
6873           break;
6874         case mask_not_on_destination:
6875           err_msg = _("mask not on destination operand");
6876           break;
6877         case no_default_mask:
6878           err_msg = _("default mask isn't allowed");
6879           break;
6880         case unsupported_rc_sae:
6881           err_msg = _("unsupported static rounding/sae");
6882           break;
6883         case rc_sae_operand_not_last_imm:
6884           if (intel_syntax)
6885             err_msg = _("RC/SAE operand must precede immediate operands");
6886           else
6887             err_msg = _("RC/SAE operand must follow immediate operands");
6888           break;
6889         case invalid_register_operand:
6890           err_msg = _("invalid register operand");
6891           break;
6892         }
6893       as_bad (_("%s for `%s'"), err_msg,
6894               current_templates->start->name);
6895       return NULL;
6896     }
6897
6898   if (!quiet_warnings)
6899     {
6900       if (!intel_syntax
6901           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6902         as_warn (_("indirect %s without `*'"), t->name);
6903
6904       if (t->opcode_modifier.isprefix
6905           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6906         {
6907           /* Warn them that a data or address size prefix doesn't
6908              affect assembly of the next line of code.  */
6909           as_warn (_("stand-alone `%s' prefix"), t->name);
6910         }
6911     }
6912
6913   /* Copy the template we found.  */
6914   install_template (t);
6915
6916   if (addr_prefix_disp != -1)
6917     i.tm.operand_types[addr_prefix_disp]
6918       = operand_types[addr_prefix_disp];
6919
6920   if (found_reverse_match)
6921     {
6922       /* If we found a reverse match we must alter the opcode direction
6923          bit and clear/flip the regmem modifier one.  found_reverse_match
6924          holds bits to change (different for int & float insns).  */
6925
6926       i.tm.base_opcode ^= found_reverse_match;
6927
6928       i.tm.operand_types[0] = operand_types[i.operands - 1];
6929       i.tm.operand_types[i.operands - 1] = operand_types[0];
6930
6931       /* Certain SIMD insns have their load forms specified in the opcode
6932          table, and hence we need to _set_ RegMem instead of clearing it.
6933          We need to avoid setting the bit though on insns like KMOVW.  */
6934       i.tm.opcode_modifier.regmem
6935         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6936           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6937           && !i.tm.opcode_modifier.regmem;
6938     }
6939
6940   return t;
6941 }
6942
6943 static int
6944 check_string (void)
6945 {
6946   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6947   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6948
6949   if (i.seg[op] != NULL && i.seg[op] != reg_es)
6950     {
6951       as_bad (_("`%s' operand %u must use `%ses' segment"),
6952               i.tm.name,
6953               intel_syntax ? i.tm.operands - es_op : es_op + 1,
6954               register_prefix);
6955       return 0;
6956     }
6957
6958   /* There's only ever one segment override allowed per instruction.
6959      This instruction possibly has a legal segment override on the
6960      second operand, so copy the segment to where non-string
6961      instructions store it, allowing common code.  */
6962   i.seg[op] = i.seg[1];
6963
6964   return 1;
6965 }
6966
6967 static int
6968 process_suffix (void)
6969 {
6970   bool is_crc32 = false, is_movx = false;
6971
6972   /* If matched instruction specifies an explicit instruction mnemonic
6973      suffix, use it.  */
6974   if (i.tm.opcode_modifier.size == SIZE16)
6975     i.suffix = WORD_MNEM_SUFFIX;
6976   else if (i.tm.opcode_modifier.size == SIZE32)
6977     i.suffix = LONG_MNEM_SUFFIX;
6978   else if (i.tm.opcode_modifier.size == SIZE64)
6979     i.suffix = QWORD_MNEM_SUFFIX;
6980   else if (i.reg_operands
6981            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
6982            && !i.tm.opcode_modifier.addrprefixopreg)
6983     {
6984       unsigned int numop = i.operands;
6985
6986       /* MOVSX/MOVZX */
6987       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
6988                  && (i.tm.base_opcode | 8) == 0xbe)
6989                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
6990                     && i.tm.base_opcode == 0x63
6991                     && i.tm.cpu_flags.bitfield.cpu64);
6992
6993       /* CRC32 */
6994       is_crc32 = (i.tm.base_opcode == 0xf0
6995                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
6996                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
6997
6998       /* movsx/movzx want only their source operand considered here, for the
6999          ambiguity checking below.  The suffix will be replaced afterwards
7000          to represent the destination (register).  */
7001       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7002         --i.operands;
7003
7004       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7005       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7006         i.rex |= REX_W;
7007
7008       /* If there's no instruction mnemonic suffix we try to invent one
7009          based on GPR operands.  */
7010       if (!i.suffix)
7011         {
7012           /* We take i.suffix from the last register operand specified,
7013              Destination register type is more significant than source
7014              register type.  crc32 in SSE4.2 prefers source register
7015              type. */
7016           unsigned int op = is_crc32 ? 1 : i.operands;
7017
7018           while (op--)
7019             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7020                 || i.tm.operand_types[op].bitfield.instance == Accum)
7021               {
7022                 if (i.types[op].bitfield.class != Reg)
7023                   continue;
7024                 if (i.types[op].bitfield.byte)
7025                   i.suffix = BYTE_MNEM_SUFFIX;
7026                 else if (i.types[op].bitfield.word)
7027                   i.suffix = WORD_MNEM_SUFFIX;
7028                 else if (i.types[op].bitfield.dword)
7029                   i.suffix = LONG_MNEM_SUFFIX;
7030                 else if (i.types[op].bitfield.qword)
7031                   i.suffix = QWORD_MNEM_SUFFIX;
7032                 else
7033                   continue;
7034                 break;
7035               }
7036
7037           /* As an exception, movsx/movzx silently default to a byte source
7038              in AT&T mode.  */
7039           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7040             i.suffix = BYTE_MNEM_SUFFIX;
7041         }
7042       else if (i.suffix == BYTE_MNEM_SUFFIX)
7043         {
7044           if (intel_syntax
7045               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7046               && i.tm.opcode_modifier.no_bsuf)
7047             i.suffix = 0;
7048           else if (!check_byte_reg ())
7049             return 0;
7050         }
7051       else if (i.suffix == LONG_MNEM_SUFFIX)
7052         {
7053           if (intel_syntax
7054               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7055               && i.tm.opcode_modifier.no_lsuf
7056               && !i.tm.opcode_modifier.todword
7057               && !i.tm.opcode_modifier.toqword)
7058             i.suffix = 0;
7059           else if (!check_long_reg ())
7060             return 0;
7061         }
7062       else if (i.suffix == QWORD_MNEM_SUFFIX)
7063         {
7064           if (intel_syntax
7065               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7066               && i.tm.opcode_modifier.no_qsuf
7067               && !i.tm.opcode_modifier.todword
7068               && !i.tm.opcode_modifier.toqword)
7069             i.suffix = 0;
7070           else if (!check_qword_reg ())
7071             return 0;
7072         }
7073       else if (i.suffix == WORD_MNEM_SUFFIX)
7074         {
7075           if (intel_syntax
7076               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
7077               && i.tm.opcode_modifier.no_wsuf)
7078             i.suffix = 0;
7079           else if (!check_word_reg ())
7080             return 0;
7081         }
7082       else if (intel_syntax
7083                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7084         /* Do nothing if the instruction is going to ignore the prefix.  */
7085         ;
7086       else
7087         abort ();
7088
7089       /* Undo the movsx/movzx change done above.  */
7090       i.operands = numop;
7091     }
7092   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7093            && !i.suffix)
7094     {
7095       i.suffix = stackop_size;
7096       if (stackop_size == LONG_MNEM_SUFFIX)
7097         {
7098           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7099              .code16gcc directive to support 16-bit mode with
7100              32-bit address.  For IRET without a suffix, generate
7101              16-bit IRET (opcode 0xcf) to return from an interrupt
7102              handler.  */
7103           if (i.tm.base_opcode == 0xcf)
7104             {
7105               i.suffix = WORD_MNEM_SUFFIX;
7106               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7107             }
7108           /* Warn about changed behavior for segment register push/pop.  */
7109           else if ((i.tm.base_opcode | 1) == 0x07)
7110             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7111                      i.tm.name);
7112         }
7113     }
7114   else if (!i.suffix
7115            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7116                || i.tm.opcode_modifier.jump == JUMP_BYTE
7117                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7118                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7119                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7120                    && i.tm.extension_opcode <= 3)))
7121     {
7122       switch (flag_code)
7123         {
7124         case CODE_64BIT:
7125           if (!i.tm.opcode_modifier.no_qsuf)
7126             {
7127               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7128                   || i.tm.opcode_modifier.no_lsuf)
7129                 i.suffix = QWORD_MNEM_SUFFIX;
7130               break;
7131             }
7132           /* Fall through.  */
7133         case CODE_32BIT:
7134           if (!i.tm.opcode_modifier.no_lsuf)
7135             i.suffix = LONG_MNEM_SUFFIX;
7136           break;
7137         case CODE_16BIT:
7138           if (!i.tm.opcode_modifier.no_wsuf)
7139             i.suffix = WORD_MNEM_SUFFIX;
7140           break;
7141         }
7142     }
7143
7144   if (!i.suffix
7145       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7146           /* Also cover lret/retf/iret in 64-bit mode.  */
7147           || (flag_code == CODE_64BIT
7148               && !i.tm.opcode_modifier.no_lsuf
7149               && !i.tm.opcode_modifier.no_qsuf))
7150       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7151       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7152       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7153       /* Accept FLDENV et al without suffix.  */
7154       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7155     {
7156       unsigned int suffixes, evex = 0;
7157
7158       suffixes = !i.tm.opcode_modifier.no_bsuf;
7159       if (!i.tm.opcode_modifier.no_wsuf)
7160         suffixes |= 1 << 1;
7161       if (!i.tm.opcode_modifier.no_lsuf)
7162         suffixes |= 1 << 2;
7163       if (!i.tm.opcode_modifier.no_ldsuf)
7164         suffixes |= 1 << 3;
7165       if (!i.tm.opcode_modifier.no_ssuf)
7166         suffixes |= 1 << 4;
7167       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7168         suffixes |= 1 << 5;
7169
7170       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7171          also suitable for AT&T syntax mode, it was requested that this be
7172          restricted to just Intel syntax.  */
7173       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast.type)
7174         {
7175           unsigned int op;
7176
7177           for (op = 0; op < i.tm.operands; ++op)
7178             {
7179               if (is_evex_encoding (&i.tm)
7180                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7181                 {
7182                   if (i.tm.operand_types[op].bitfield.ymmword)
7183                     i.tm.operand_types[op].bitfield.xmmword = 0;
7184                   if (i.tm.operand_types[op].bitfield.zmmword)
7185                     i.tm.operand_types[op].bitfield.ymmword = 0;
7186                   if (!i.tm.opcode_modifier.evex
7187                       || i.tm.opcode_modifier.evex == EVEXDYN)
7188                     i.tm.opcode_modifier.evex = EVEX512;
7189                 }
7190
7191               if (i.tm.operand_types[op].bitfield.xmmword
7192                   + i.tm.operand_types[op].bitfield.ymmword
7193                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7194                 continue;
7195
7196               /* Any properly sized operand disambiguates the insn.  */
7197               if (i.types[op].bitfield.xmmword
7198                   || i.types[op].bitfield.ymmword
7199                   || i.types[op].bitfield.zmmword)
7200                 {
7201                   suffixes &= ~(7 << 6);
7202                   evex = 0;
7203                   break;
7204                 }
7205
7206               if ((i.flags[op] & Operand_Mem)
7207                   && i.tm.operand_types[op].bitfield.unspecified)
7208                 {
7209                   if (i.tm.operand_types[op].bitfield.xmmword)
7210                     suffixes |= 1 << 6;
7211                   if (i.tm.operand_types[op].bitfield.ymmword)
7212                     suffixes |= 1 << 7;
7213                   if (i.tm.operand_types[op].bitfield.zmmword)
7214                     suffixes |= 1 << 8;
7215                   if (is_evex_encoding (&i.tm))
7216                     evex = EVEX512;
7217                 }
7218             }
7219         }
7220
7221       /* Are multiple suffixes / operand sizes allowed?  */
7222       if (suffixes & (suffixes - 1))
7223         {
7224           if (intel_syntax
7225               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7226                   || operand_check == check_error))
7227             {
7228               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7229               return 0;
7230             }
7231           if (operand_check == check_error)
7232             {
7233               as_bad (_("no instruction mnemonic suffix given and "
7234                         "no register operands; can't size `%s'"), i.tm.name);
7235               return 0;
7236             }
7237           if (operand_check == check_warning)
7238             as_warn (_("%s; using default for `%s'"),
7239                        intel_syntax
7240                        ? _("ambiguous operand size")
7241                        : _("no instruction mnemonic suffix given and "
7242                            "no register operands"),
7243                        i.tm.name);
7244
7245           if (i.tm.opcode_modifier.floatmf)
7246             i.suffix = SHORT_MNEM_SUFFIX;
7247           else if (is_movx)
7248             /* handled below */;
7249           else if (evex)
7250             i.tm.opcode_modifier.evex = evex;
7251           else if (flag_code == CODE_16BIT)
7252             i.suffix = WORD_MNEM_SUFFIX;
7253           else if (!i.tm.opcode_modifier.no_lsuf)
7254             i.suffix = LONG_MNEM_SUFFIX;
7255           else
7256             i.suffix = QWORD_MNEM_SUFFIX;
7257         }
7258     }
7259
7260   if (is_movx)
7261     {
7262       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7263          In AT&T syntax, if there is no suffix (warned about above), the default
7264          will be byte extension.  */
7265       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7266         i.tm.base_opcode |= 1;
7267
7268       /* For further processing, the suffix should represent the destination
7269          (register).  This is already the case when one was used with
7270          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7271          no suffix to begin with.  */
7272       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7273         {
7274           if (i.types[1].bitfield.word)
7275             i.suffix = WORD_MNEM_SUFFIX;
7276           else if (i.types[1].bitfield.qword)
7277             i.suffix = QWORD_MNEM_SUFFIX;
7278           else
7279             i.suffix = LONG_MNEM_SUFFIX;
7280
7281           i.tm.opcode_modifier.w = 0;
7282         }
7283     }
7284
7285   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7286     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7287                    != (i.tm.operand_types[1].bitfield.class == Reg);
7288
7289   /* Change the opcode based on the operand size given by i.suffix.  */
7290   switch (i.suffix)
7291     {
7292     /* Size floating point instruction.  */
7293     case LONG_MNEM_SUFFIX:
7294       if (i.tm.opcode_modifier.floatmf)
7295         {
7296           i.tm.base_opcode ^= 4;
7297           break;
7298         }
7299     /* fall through */
7300     case WORD_MNEM_SUFFIX:
7301     case QWORD_MNEM_SUFFIX:
7302       /* It's not a byte, select word/dword operation.  */
7303       if (i.tm.opcode_modifier.w)
7304         {
7305           if (i.short_form)
7306             i.tm.base_opcode |= 8;
7307           else
7308             i.tm.base_opcode |= 1;
7309         }
7310     /* fall through */
7311     case SHORT_MNEM_SUFFIX:
7312       /* Now select between word & dword operations via the operand
7313          size prefix, except for instructions that will ignore this
7314          prefix anyway.  */
7315       if (i.suffix != QWORD_MNEM_SUFFIX
7316           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7317           && !i.tm.opcode_modifier.floatmf
7318           && !is_any_vex_encoding (&i.tm)
7319           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7320               || (flag_code == CODE_64BIT
7321                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7322         {
7323           unsigned int prefix = DATA_PREFIX_OPCODE;
7324
7325           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7326             prefix = ADDR_PREFIX_OPCODE;
7327
7328           if (!add_prefix (prefix))
7329             return 0;
7330         }
7331
7332       /* Set mode64 for an operand.  */
7333       if (i.suffix == QWORD_MNEM_SUFFIX
7334           && flag_code == CODE_64BIT
7335           && !i.tm.opcode_modifier.norex64
7336           && !i.tm.opcode_modifier.vexw
7337           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7338              need rex64. */
7339           && ! (i.operands == 2
7340                 && i.tm.base_opcode == 0x90
7341                 && i.tm.extension_opcode == None
7342                 && i.types[0].bitfield.instance == Accum
7343                 && i.types[0].bitfield.qword
7344                 && i.types[1].bitfield.instance == Accum
7345                 && i.types[1].bitfield.qword))
7346         i.rex |= REX_W;
7347
7348       break;
7349
7350     case 0:
7351       /* Select word/dword/qword operation with explicit data sizing prefix
7352          when there are no suitable register operands.  */
7353       if (i.tm.opcode_modifier.w
7354           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7355           && (!i.reg_operands
7356               || (i.reg_operands == 1
7357                       /* ShiftCount */
7358                   && (i.tm.operand_types[0].bitfield.instance == RegC
7359                       /* InOutPortReg */
7360                       || i.tm.operand_types[0].bitfield.instance == RegD
7361                       || i.tm.operand_types[1].bitfield.instance == RegD
7362                       /* CRC32 */
7363                       || is_crc32))))
7364         i.tm.base_opcode |= 1;
7365       break;
7366     }
7367
7368   if (i.tm.opcode_modifier.addrprefixopreg)
7369     {
7370       gas_assert (!i.suffix);
7371       gas_assert (i.reg_operands);
7372
7373       if (i.tm.operand_types[0].bitfield.instance == Accum
7374           || i.operands == 1)
7375         {
7376           /* The address size override prefix changes the size of the
7377              first operand.  */
7378           if (flag_code == CODE_64BIT
7379               && i.op[0].regs->reg_type.bitfield.word)
7380             {
7381               as_bad (_("16-bit addressing unavailable for `%s'"),
7382                       i.tm.name);
7383               return 0;
7384             }
7385
7386           if ((flag_code == CODE_32BIT
7387                ? i.op[0].regs->reg_type.bitfield.word
7388                : i.op[0].regs->reg_type.bitfield.dword)
7389               && !add_prefix (ADDR_PREFIX_OPCODE))
7390             return 0;
7391         }
7392       else
7393         {
7394           /* Check invalid register operand when the address size override
7395              prefix changes the size of register operands.  */
7396           unsigned int op;
7397           enum { need_word, need_dword, need_qword } need;
7398
7399           /* Check the register operand for the address size prefix if
7400              the memory operand has no real registers, like symbol, DISP
7401              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7402           if (i.mem_operands == 1
7403               && i.reg_operands == 1
7404               && i.operands == 2
7405               && i.types[1].bitfield.class == Reg
7406               && (flag_code == CODE_32BIT
7407                   ? i.op[1].regs->reg_type.bitfield.word
7408                   : i.op[1].regs->reg_type.bitfield.dword)
7409               && ((i.base_reg == NULL && i.index_reg == NULL)
7410 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7411                   || (x86_elf_abi == X86_64_X32_ABI
7412                       && i.base_reg
7413                       && i.base_reg->reg_num == RegIP
7414                       && i.base_reg->reg_type.bitfield.qword))
7415 #else
7416                   || 0)
7417 #endif
7418               && !add_prefix (ADDR_PREFIX_OPCODE))
7419             return 0;
7420
7421           if (flag_code == CODE_32BIT)
7422             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7423           else if (i.prefix[ADDR_PREFIX])
7424             need = need_dword;
7425           else
7426             need = flag_code == CODE_64BIT ? need_qword : need_word;
7427
7428           for (op = 0; op < i.operands; op++)
7429             {
7430               if (i.types[op].bitfield.class != Reg)
7431                 continue;
7432
7433               switch (need)
7434                 {
7435                 case need_word:
7436                   if (i.op[op].regs->reg_type.bitfield.word)
7437                     continue;
7438                   break;
7439                 case need_dword:
7440                   if (i.op[op].regs->reg_type.bitfield.dword)
7441                     continue;
7442                   break;
7443                 case need_qword:
7444                   if (i.op[op].regs->reg_type.bitfield.qword)
7445                     continue;
7446                   break;
7447                 }
7448
7449               as_bad (_("invalid register operand size for `%s'"),
7450                       i.tm.name);
7451               return 0;
7452             }
7453         }
7454     }
7455
7456   return 1;
7457 }
7458
7459 static int
7460 check_byte_reg (void)
7461 {
7462   int op;
7463
7464   for (op = i.operands; --op >= 0;)
7465     {
7466       /* Skip non-register operands. */
7467       if (i.types[op].bitfield.class != Reg)
7468         continue;
7469
7470       /* If this is an eight bit register, it's OK.  If it's the 16 or
7471          32 bit version of an eight bit register, we will just use the
7472          low portion, and that's OK too.  */
7473       if (i.types[op].bitfield.byte)
7474         continue;
7475
7476       /* I/O port address operands are OK too.  */
7477       if (i.tm.operand_types[op].bitfield.instance == RegD
7478           && i.tm.operand_types[op].bitfield.word)
7479         continue;
7480
7481       /* crc32 only wants its source operand checked here.  */
7482       if (i.tm.base_opcode == 0xf0
7483           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7484           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7485           && op != 0)
7486         continue;
7487
7488       /* Any other register is bad.  */
7489       as_bad (_("`%s%s' not allowed with `%s%c'"),
7490               register_prefix, i.op[op].regs->reg_name,
7491               i.tm.name, i.suffix);
7492       return 0;
7493     }
7494   return 1;
7495 }
7496
7497 static int
7498 check_long_reg (void)
7499 {
7500   int op;
7501
7502   for (op = i.operands; --op >= 0;)
7503     /* Skip non-register operands. */
7504     if (i.types[op].bitfield.class != Reg)
7505       continue;
7506     /* Reject eight bit registers, except where the template requires
7507        them. (eg. movzb)  */
7508     else if (i.types[op].bitfield.byte
7509              && (i.tm.operand_types[op].bitfield.class == Reg
7510                  || i.tm.operand_types[op].bitfield.instance == Accum)
7511              && (i.tm.operand_types[op].bitfield.word
7512                  || i.tm.operand_types[op].bitfield.dword))
7513       {
7514         as_bad (_("`%s%s' not allowed with `%s%c'"),
7515                 register_prefix,
7516                 i.op[op].regs->reg_name,
7517                 i.tm.name,
7518                 i.suffix);
7519         return 0;
7520       }
7521     /* Error if the e prefix on a general reg is missing.  */
7522     else if (i.types[op].bitfield.word
7523              && (i.tm.operand_types[op].bitfield.class == Reg
7524                  || i.tm.operand_types[op].bitfield.instance == Accum)
7525              && i.tm.operand_types[op].bitfield.dword)
7526       {
7527         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7528                 register_prefix, i.op[op].regs->reg_name,
7529                 i.suffix);
7530         return 0;
7531       }
7532     /* Warn if the r prefix on a general reg is present.  */
7533     else if (i.types[op].bitfield.qword
7534              && (i.tm.operand_types[op].bitfield.class == Reg
7535                  || i.tm.operand_types[op].bitfield.instance == Accum)
7536              && i.tm.operand_types[op].bitfield.dword)
7537       {
7538         if (intel_syntax
7539             && i.tm.opcode_modifier.toqword
7540             && i.types[0].bitfield.class != RegSIMD)
7541           {
7542             /* Convert to QWORD.  We want REX byte. */
7543             i.suffix = QWORD_MNEM_SUFFIX;
7544           }
7545         else
7546           {
7547             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7548                     register_prefix, i.op[op].regs->reg_name,
7549                     i.suffix);
7550             return 0;
7551           }
7552       }
7553   return 1;
7554 }
7555
7556 static int
7557 check_qword_reg (void)
7558 {
7559   int op;
7560
7561   for (op = i.operands; --op >= 0; )
7562     /* Skip non-register operands. */
7563     if (i.types[op].bitfield.class != Reg)
7564       continue;
7565     /* Reject eight bit registers, except where the template requires
7566        them. (eg. movzb)  */
7567     else if (i.types[op].bitfield.byte
7568              && (i.tm.operand_types[op].bitfield.class == Reg
7569                  || i.tm.operand_types[op].bitfield.instance == Accum)
7570              && (i.tm.operand_types[op].bitfield.word
7571                  || i.tm.operand_types[op].bitfield.dword))
7572       {
7573         as_bad (_("`%s%s' not allowed with `%s%c'"),
7574                 register_prefix,
7575                 i.op[op].regs->reg_name,
7576                 i.tm.name,
7577                 i.suffix);
7578         return 0;
7579       }
7580     /* Warn if the r prefix on a general reg is missing.  */
7581     else if ((i.types[op].bitfield.word
7582               || i.types[op].bitfield.dword)
7583              && (i.tm.operand_types[op].bitfield.class == Reg
7584                  || i.tm.operand_types[op].bitfield.instance == Accum)
7585              && i.tm.operand_types[op].bitfield.qword)
7586       {
7587         /* Prohibit these changes in the 64bit mode, since the
7588            lowering is more complicated.  */
7589         if (intel_syntax
7590             && i.tm.opcode_modifier.todword
7591             && i.types[0].bitfield.class != RegSIMD)
7592           {
7593             /* Convert to DWORD.  We don't want REX byte. */
7594             i.suffix = LONG_MNEM_SUFFIX;
7595           }
7596         else
7597           {
7598             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7599                     register_prefix, i.op[op].regs->reg_name,
7600                     i.suffix);
7601             return 0;
7602           }
7603       }
7604   return 1;
7605 }
7606
7607 static int
7608 check_word_reg (void)
7609 {
7610   int op;
7611   for (op = i.operands; --op >= 0;)
7612     /* Skip non-register operands. */
7613     if (i.types[op].bitfield.class != Reg)
7614       continue;
7615     /* Reject eight bit registers, except where the template requires
7616        them. (eg. movzb)  */
7617     else if (i.types[op].bitfield.byte
7618              && (i.tm.operand_types[op].bitfield.class == Reg
7619                  || i.tm.operand_types[op].bitfield.instance == Accum)
7620              && (i.tm.operand_types[op].bitfield.word
7621                  || i.tm.operand_types[op].bitfield.dword))
7622       {
7623         as_bad (_("`%s%s' not allowed with `%s%c'"),
7624                 register_prefix,
7625                 i.op[op].regs->reg_name,
7626                 i.tm.name,
7627                 i.suffix);
7628         return 0;
7629       }
7630     /* Error if the e or r prefix on a general reg is present.  */
7631     else if ((i.types[op].bitfield.dword
7632                  || i.types[op].bitfield.qword)
7633              && (i.tm.operand_types[op].bitfield.class == Reg
7634                  || i.tm.operand_types[op].bitfield.instance == Accum)
7635              && i.tm.operand_types[op].bitfield.word)
7636       {
7637         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7638                 register_prefix, i.op[op].regs->reg_name,
7639                 i.suffix);
7640         return 0;
7641       }
7642   return 1;
7643 }
7644
7645 static int
7646 update_imm (unsigned int j)
7647 {
7648   i386_operand_type overlap = i.types[j];
7649   if ((overlap.bitfield.imm8
7650        || overlap.bitfield.imm8s
7651        || overlap.bitfield.imm16
7652        || overlap.bitfield.imm32
7653        || overlap.bitfield.imm32s
7654        || overlap.bitfield.imm64)
7655       && !operand_type_equal (&overlap, &imm8)
7656       && !operand_type_equal (&overlap, &imm8s)
7657       && !operand_type_equal (&overlap, &imm16)
7658       && !operand_type_equal (&overlap, &imm32)
7659       && !operand_type_equal (&overlap, &imm32s)
7660       && !operand_type_equal (&overlap, &imm64))
7661     {
7662       if (i.suffix)
7663         {
7664           i386_operand_type temp;
7665
7666           operand_type_set (&temp, 0);
7667           if (i.suffix == BYTE_MNEM_SUFFIX)
7668             {
7669               temp.bitfield.imm8 = overlap.bitfield.imm8;
7670               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7671             }
7672           else if (i.suffix == WORD_MNEM_SUFFIX)
7673             temp.bitfield.imm16 = overlap.bitfield.imm16;
7674           else if (i.suffix == QWORD_MNEM_SUFFIX)
7675             {
7676               temp.bitfield.imm64 = overlap.bitfield.imm64;
7677               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7678             }
7679           else
7680             temp.bitfield.imm32 = overlap.bitfield.imm32;
7681           overlap = temp;
7682         }
7683       else if (operand_type_equal (&overlap, &imm16_32_32s)
7684                || operand_type_equal (&overlap, &imm16_32)
7685                || operand_type_equal (&overlap, &imm16_32s))
7686         {
7687           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7688             overlap = imm16;
7689           else
7690             overlap = imm32s;
7691         }
7692       else if (i.prefix[REX_PREFIX] & REX_W)
7693         overlap = operand_type_and (overlap, imm32s);
7694       else if (i.prefix[DATA_PREFIX])
7695         overlap = operand_type_and (overlap,
7696                                     flag_code != CODE_16BIT ? imm16 : imm32);
7697       if (!operand_type_equal (&overlap, &imm8)
7698           && !operand_type_equal (&overlap, &imm8s)
7699           && !operand_type_equal (&overlap, &imm16)
7700           && !operand_type_equal (&overlap, &imm32)
7701           && !operand_type_equal (&overlap, &imm32s)
7702           && !operand_type_equal (&overlap, &imm64))
7703         {
7704           as_bad (_("no instruction mnemonic suffix given; "
7705                     "can't determine immediate size"));
7706           return 0;
7707         }
7708     }
7709   i.types[j] = overlap;
7710
7711   return 1;
7712 }
7713
7714 static int
7715 finalize_imm (void)
7716 {
7717   unsigned int j, n;
7718
7719   /* Update the first 2 immediate operands.  */
7720   n = i.operands > 2 ? 2 : i.operands;
7721   if (n)
7722     {
7723       for (j = 0; j < n; j++)
7724         if (update_imm (j) == 0)
7725           return 0;
7726
7727       /* The 3rd operand can't be immediate operand.  */
7728       gas_assert (operand_type_check (i.types[2], imm) == 0);
7729     }
7730
7731   return 1;
7732 }
7733
7734 static int
7735 process_operands (void)
7736 {
7737   /* Default segment register this instruction will use for memory
7738      accesses.  0 means unknown.  This is only for optimizing out
7739      unnecessary segment overrides.  */
7740   const reg_entry *default_seg = NULL;
7741
7742   if (i.tm.opcode_modifier.sse2avx)
7743     {
7744       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7745          need converting.  */
7746       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7747       i.prefix[REX_PREFIX] = 0;
7748       i.rex_encoding = 0;
7749     }
7750   /* ImmExt should be processed after SSE2AVX.  */
7751   else if (i.tm.opcode_modifier.immext)
7752     process_immext ();
7753
7754   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7755     {
7756       unsigned int dupl = i.operands;
7757       unsigned int dest = dupl - 1;
7758       unsigned int j;
7759
7760       /* The destination must be an xmm register.  */
7761       gas_assert (i.reg_operands
7762                   && MAX_OPERANDS > dupl
7763                   && operand_type_equal (&i.types[dest], &regxmm));
7764
7765       if (i.tm.operand_types[0].bitfield.instance == Accum
7766           && i.tm.operand_types[0].bitfield.xmmword)
7767         {
7768           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7769             {
7770               /* Keep xmm0 for instructions with VEX prefix and 3
7771                  sources.  */
7772               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7773               i.tm.operand_types[0].bitfield.class = RegSIMD;
7774               goto duplicate;
7775             }
7776           else
7777             {
7778               /* We remove the first xmm0 and keep the number of
7779                  operands unchanged, which in fact duplicates the
7780                  destination.  */
7781               for (j = 1; j < i.operands; j++)
7782                 {
7783                   i.op[j - 1] = i.op[j];
7784                   i.types[j - 1] = i.types[j];
7785                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7786                   i.flags[j - 1] = i.flags[j];
7787                 }
7788             }
7789         }
7790       else if (i.tm.opcode_modifier.implicit1stxmm0)
7791         {
7792           gas_assert ((MAX_OPERANDS - 1) > dupl
7793                       && (i.tm.opcode_modifier.vexsources
7794                           == VEX3SOURCES));
7795
7796           /* Add the implicit xmm0 for instructions with VEX prefix
7797              and 3 sources.  */
7798           for (j = i.operands; j > 0; j--)
7799             {
7800               i.op[j] = i.op[j - 1];
7801               i.types[j] = i.types[j - 1];
7802               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7803               i.flags[j] = i.flags[j - 1];
7804             }
7805           i.op[0].regs
7806             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7807           i.types[0] = regxmm;
7808           i.tm.operand_types[0] = regxmm;
7809
7810           i.operands += 2;
7811           i.reg_operands += 2;
7812           i.tm.operands += 2;
7813
7814           dupl++;
7815           dest++;
7816           i.op[dupl] = i.op[dest];
7817           i.types[dupl] = i.types[dest];
7818           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7819           i.flags[dupl] = i.flags[dest];
7820         }
7821       else
7822         {
7823         duplicate:
7824           i.operands++;
7825           i.reg_operands++;
7826           i.tm.operands++;
7827
7828           i.op[dupl] = i.op[dest];
7829           i.types[dupl] = i.types[dest];
7830           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7831           i.flags[dupl] = i.flags[dest];
7832         }
7833
7834        if (i.tm.opcode_modifier.immext)
7835          process_immext ();
7836     }
7837   else if (i.tm.operand_types[0].bitfield.instance == Accum
7838            && i.tm.operand_types[0].bitfield.xmmword)
7839     {
7840       unsigned int j;
7841
7842       for (j = 1; j < i.operands; j++)
7843         {
7844           i.op[j - 1] = i.op[j];
7845           i.types[j - 1] = i.types[j];
7846
7847           /* We need to adjust fields in i.tm since they are used by
7848              build_modrm_byte.  */
7849           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7850
7851           i.flags[j - 1] = i.flags[j];
7852         }
7853
7854       i.operands--;
7855       i.reg_operands--;
7856       i.tm.operands--;
7857     }
7858   else if (i.tm.opcode_modifier.implicitquadgroup)
7859     {
7860       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7861
7862       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7863       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7864       regnum = register_number (i.op[1].regs);
7865       first_reg_in_group = regnum & ~3;
7866       last_reg_in_group = first_reg_in_group + 3;
7867       if (regnum != first_reg_in_group)
7868         as_warn (_("source register `%s%s' implicitly denotes"
7869                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7870                  register_prefix, i.op[1].regs->reg_name,
7871                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7872                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7873                  i.tm.name);
7874     }
7875   else if (i.tm.opcode_modifier.regkludge)
7876     {
7877       /* The imul $imm, %reg instruction is converted into
7878          imul $imm, %reg, %reg, and the clr %reg instruction
7879          is converted into xor %reg, %reg.  */
7880
7881       unsigned int first_reg_op;
7882
7883       if (operand_type_check (i.types[0], reg))
7884         first_reg_op = 0;
7885       else
7886         first_reg_op = 1;
7887       /* Pretend we saw the extra register operand.  */
7888       gas_assert (i.reg_operands == 1
7889                   && i.op[first_reg_op + 1].regs == 0);
7890       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7891       i.types[first_reg_op + 1] = i.types[first_reg_op];
7892       i.operands++;
7893       i.reg_operands++;
7894     }
7895
7896   if (i.tm.opcode_modifier.modrm)
7897     {
7898       /* The opcode is completed (modulo i.tm.extension_opcode which
7899          must be put into the modrm byte).  Now, we make the modrm and
7900          index base bytes based on all the info we've collected.  */
7901
7902       default_seg = build_modrm_byte ();
7903     }
7904   else if (i.types[0].bitfield.class == SReg)
7905     {
7906       if (flag_code != CODE_64BIT
7907           ? i.tm.base_opcode == POP_SEG_SHORT
7908             && i.op[0].regs->reg_num == 1
7909           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
7910             && i.op[0].regs->reg_num < 4)
7911         {
7912           as_bad (_("you can't `%s %s%s'"),
7913                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7914           return 0;
7915         }
7916       if (i.op[0].regs->reg_num > 3
7917           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
7918         {
7919           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
7920           i.tm.opcode_modifier.opcodespace = SPACE_0F;
7921         }
7922       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7923     }
7924   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7925            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
7926     {
7927       default_seg = reg_ds;
7928     }
7929   else if (i.tm.opcode_modifier.isstring)
7930     {
7931       /* For the string instructions that allow a segment override
7932          on one of their operands, the default segment is ds.  */
7933       default_seg = reg_ds;
7934     }
7935   else if (i.short_form)
7936     {
7937       /* The register or float register operand is in operand
7938          0 or 1.  */
7939       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7940
7941       /* Register goes in low 3 bits of opcode.  */
7942       i.tm.base_opcode |= i.op[op].regs->reg_num;
7943       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7944         i.rex |= REX_B;
7945       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7946         {
7947           /* Warn about some common errors, but press on regardless.
7948              The first case can be generated by gcc (<= 2.8.1).  */
7949           if (i.operands == 2)
7950             {
7951               /* Reversed arguments on faddp, fsubp, etc.  */
7952               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7953                        register_prefix, i.op[!intel_syntax].regs->reg_name,
7954                        register_prefix, i.op[intel_syntax].regs->reg_name);
7955             }
7956           else
7957             {
7958               /* Extraneous `l' suffix on fp insn.  */
7959               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7960                        register_prefix, i.op[0].regs->reg_name);
7961             }
7962         }
7963     }
7964
7965   if ((i.seg[0] || i.prefix[SEG_PREFIX])
7966       && i.tm.base_opcode == 0x8d /* lea */
7967       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
7968       && !is_any_vex_encoding(&i.tm))
7969     {
7970       if (!quiet_warnings)
7971         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7972       if (optimize)
7973         {
7974           i.seg[0] = NULL;
7975           i.prefix[SEG_PREFIX] = 0;
7976         }
7977     }
7978
7979   /* If a segment was explicitly specified, and the specified segment
7980      is neither the default nor the one already recorded from a prefix,
7981      use an opcode prefix to select it.  If we never figured out what
7982      the default segment is, then default_seg will be zero at this
7983      point, and the specified segment prefix will always be used.  */
7984   if (i.seg[0]
7985       && i.seg[0] != default_seg
7986       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
7987     {
7988       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
7989         return 0;
7990     }
7991   return 1;
7992 }
7993
7994 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
7995                                  bool do_sse2avx)
7996 {
7997   if (r->reg_flags & RegRex)
7998     {
7999       if (i.rex & rex_bit)
8000         as_bad (_("same type of prefix used twice"));
8001       i.rex |= rex_bit;
8002     }
8003   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8004     {
8005       gas_assert (i.vex.register_specifier == r);
8006       i.vex.register_specifier += 8;
8007     }
8008
8009   if (r->reg_flags & RegVRex)
8010     i.vrex |= rex_bit;
8011 }
8012
8013 static const reg_entry *
8014 build_modrm_byte (void)
8015 {
8016   const reg_entry *default_seg = NULL;
8017   unsigned int source, dest;
8018   int vex_3_sources;
8019
8020   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8021   if (vex_3_sources)
8022     {
8023       unsigned int nds, reg_slot;
8024       expressionS *exp;
8025
8026       dest = i.operands - 1;
8027       nds = dest - 1;
8028
8029       /* There are 2 kinds of instructions:
8030          1. 5 operands: 4 register operands or 3 register operands
8031          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8032          VexW0 or VexW1.  The destination must be either XMM, YMM or
8033          ZMM register.
8034          2. 4 operands: 4 register operands or 3 register operands
8035          plus 1 memory operand, with VexXDS.  */
8036       gas_assert ((i.reg_operands == 4
8037                    || (i.reg_operands == 3 && i.mem_operands == 1))
8038                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8039                   && i.tm.opcode_modifier.vexw
8040                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8041
8042       /* If VexW1 is set, the first non-immediate operand is the source and
8043          the second non-immediate one is encoded in the immediate operand.  */
8044       if (i.tm.opcode_modifier.vexw == VEXW1)
8045         {
8046           source = i.imm_operands;
8047           reg_slot = i.imm_operands + 1;
8048         }
8049       else
8050         {
8051           source = i.imm_operands + 1;
8052           reg_slot = i.imm_operands;
8053         }
8054
8055       if (i.imm_operands == 0)
8056         {
8057           /* When there is no immediate operand, generate an 8bit
8058              immediate operand to encode the first operand.  */
8059           exp = &im_expressions[i.imm_operands++];
8060           i.op[i.operands].imms = exp;
8061           i.types[i.operands] = imm8;
8062           i.operands++;
8063
8064           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8065           exp->X_op = O_constant;
8066           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8067           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8068         }
8069       else
8070         {
8071           gas_assert (i.imm_operands == 1);
8072           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8073           gas_assert (!i.tm.opcode_modifier.immext);
8074
8075           /* Turn on Imm8 again so that output_imm will generate it.  */
8076           i.types[0].bitfield.imm8 = 1;
8077
8078           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8079           i.op[0].imms->X_add_number
8080               |= register_number (i.op[reg_slot].regs) << 4;
8081           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8082         }
8083
8084       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8085       i.vex.register_specifier = i.op[nds].regs;
8086     }
8087   else
8088     source = dest = 0;
8089
8090   /* i.reg_operands MUST be the number of real register operands;
8091      implicit registers do not count.  If there are 3 register
8092      operands, it must be a instruction with VexNDS.  For a
8093      instruction with VexNDD, the destination register is encoded
8094      in VEX prefix.  If there are 4 register operands, it must be
8095      a instruction with VEX prefix and 3 sources.  */
8096   if (i.mem_operands == 0
8097       && ((i.reg_operands == 2
8098            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8099           || (i.reg_operands == 3
8100               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8101           || (i.reg_operands == 4 && vex_3_sources)))
8102     {
8103       switch (i.operands)
8104         {
8105         case 2:
8106           source = 0;
8107           break;
8108         case 3:
8109           /* When there are 3 operands, one of them may be immediate,
8110              which may be the first or the last operand.  Otherwise,
8111              the first operand must be shift count register (cl) or it
8112              is an instruction with VexNDS. */
8113           gas_assert (i.imm_operands == 1
8114                       || (i.imm_operands == 0
8115                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8116                               || (i.types[0].bitfield.instance == RegC
8117                                   && i.types[0].bitfield.byte))));
8118           if (operand_type_check (i.types[0], imm)
8119               || (i.types[0].bitfield.instance == RegC
8120                   && i.types[0].bitfield.byte))
8121             source = 1;
8122           else
8123             source = 0;
8124           break;
8125         case 4:
8126           /* When there are 4 operands, the first two must be 8bit
8127              immediate operands. The source operand will be the 3rd
8128              one.
8129
8130              For instructions with VexNDS, if the first operand
8131              an imm8, the source operand is the 2nd one.  If the last
8132              operand is imm8, the source operand is the first one.  */
8133           gas_assert ((i.imm_operands == 2
8134                        && i.types[0].bitfield.imm8
8135                        && i.types[1].bitfield.imm8)
8136                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8137                           && i.imm_operands == 1
8138                           && (i.types[0].bitfield.imm8
8139                               || i.types[i.operands - 1].bitfield.imm8
8140                               || i.rounding.type != rc_none)));
8141           if (i.imm_operands == 2)
8142             source = 2;
8143           else
8144             {
8145               if (i.types[0].bitfield.imm8)
8146                 source = 1;
8147               else
8148                 source = 0;
8149             }
8150           break;
8151         case 5:
8152           if (is_evex_encoding (&i.tm))
8153             {
8154               /* For EVEX instructions, when there are 5 operands, the
8155                  first one must be immediate operand.  If the second one
8156                  is immediate operand, the source operand is the 3th
8157                  one.  If the last one is immediate operand, the source
8158                  operand is the 2nd one.  */
8159               gas_assert (i.imm_operands == 2
8160                           && i.tm.opcode_modifier.sae
8161                           && operand_type_check (i.types[0], imm));
8162               if (operand_type_check (i.types[1], imm))
8163                 source = 2;
8164               else if (operand_type_check (i.types[4], imm))
8165                 source = 1;
8166               else
8167                 abort ();
8168             }
8169           break;
8170         default:
8171           abort ();
8172         }
8173
8174       if (!vex_3_sources)
8175         {
8176           dest = source + 1;
8177
8178           /* RC/SAE operand could be between DEST and SRC.  That happens
8179              when one operand is GPR and the other one is XMM/YMM/ZMM
8180              register.  */
8181           if (i.rounding.type != rc_none && i.rounding.operand == dest)
8182             dest++;
8183
8184           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8185             {
8186               /* For instructions with VexNDS, the register-only source
8187                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8188                  register.  It is encoded in VEX prefix.  */
8189
8190               i386_operand_type op;
8191               unsigned int vvvv;
8192
8193               /* Swap two source operands if needed.  */
8194               if (i.tm.opcode_modifier.swapsources)
8195                 {
8196                   vvvv = source;
8197                   source = dest;
8198                 }
8199               else
8200                 vvvv = dest;
8201
8202               op = i.tm.operand_types[vvvv];
8203               if ((dest + 1) >= i.operands
8204                   || ((op.bitfield.class != Reg
8205                        || (!op.bitfield.dword && !op.bitfield.qword))
8206                       && op.bitfield.class != RegSIMD
8207                       && !operand_type_equal (&op, &regmask)))
8208                 abort ();
8209               i.vex.register_specifier = i.op[vvvv].regs;
8210               dest++;
8211             }
8212         }
8213
8214       i.rm.mode = 3;
8215       /* One of the register operands will be encoded in the i.rm.reg
8216          field, the other in the combined i.rm.mode and i.rm.regmem
8217          fields.  If no form of this instruction supports a memory
8218          destination operand, then we assume the source operand may
8219          sometimes be a memory operand and so we need to store the
8220          destination in the i.rm.reg field.  */
8221       if (!i.tm.opcode_modifier.regmem
8222           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8223         {
8224           i.rm.reg = i.op[dest].regs->reg_num;
8225           i.rm.regmem = i.op[source].regs->reg_num;
8226           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8227           set_rex_vrex (i.op[source].regs, REX_B, false);
8228         }
8229       else
8230         {
8231           i.rm.reg = i.op[source].regs->reg_num;
8232           i.rm.regmem = i.op[dest].regs->reg_num;
8233           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8234           set_rex_vrex (i.op[source].regs, REX_R, false);
8235         }
8236       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8237         {
8238           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8239             abort ();
8240           i.rex &= ~REX_R;
8241           add_prefix (LOCK_PREFIX_OPCODE);
8242         }
8243     }
8244   else
8245     {                   /* If it's not 2 reg operands...  */
8246       unsigned int mem;
8247
8248       if (i.mem_operands)
8249         {
8250           unsigned int fake_zero_displacement = 0;
8251           unsigned int op;
8252
8253           for (op = 0; op < i.operands; op++)
8254             if (i.flags[op] & Operand_Mem)
8255               break;
8256           gas_assert (op < i.operands);
8257
8258           if (i.tm.opcode_modifier.sib)
8259             {
8260               /* The index register of VSIB shouldn't be RegIZ.  */
8261               if (i.tm.opcode_modifier.sib != SIBMEM
8262                   && i.index_reg->reg_num == RegIZ)
8263                 abort ();
8264
8265               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8266               if (!i.base_reg)
8267                 {
8268                   i.sib.base = NO_BASE_REGISTER;
8269                   i.sib.scale = i.log2_scale_factor;
8270                   i.types[op].bitfield.disp8 = 0;
8271                   i.types[op].bitfield.disp16 = 0;
8272                   i.types[op].bitfield.disp64 = 0;
8273                   if (want_disp32 (&i.tm))
8274                     {
8275                       /* Must be 32 bit */
8276                       i.types[op].bitfield.disp32 = 1;
8277                       i.types[op].bitfield.disp32s = 0;
8278                     }
8279                   else
8280                     {
8281                       i.types[op].bitfield.disp32 = 0;
8282                       i.types[op].bitfield.disp32s = 1;
8283                     }
8284                 }
8285
8286               /* Since the mandatory SIB always has index register, so
8287                  the code logic remains unchanged. The non-mandatory SIB
8288                  without index register is allowed and will be handled
8289                  later.  */
8290               if (i.index_reg)
8291                 {
8292                   if (i.index_reg->reg_num == RegIZ)
8293                     i.sib.index = NO_INDEX_REGISTER;
8294                   else
8295                     i.sib.index = i.index_reg->reg_num;
8296                   set_rex_vrex (i.index_reg, REX_X, false);
8297                 }
8298             }
8299
8300           default_seg = reg_ds;
8301
8302           if (i.base_reg == 0)
8303             {
8304               i.rm.mode = 0;
8305               if (!i.disp_operands)
8306                 fake_zero_displacement = 1;
8307               if (i.index_reg == 0)
8308                 {
8309                   i386_operand_type newdisp;
8310
8311                   /* Both check for VSIB and mandatory non-vector SIB. */
8312                   gas_assert (!i.tm.opcode_modifier.sib
8313                               || i.tm.opcode_modifier.sib == SIBMEM);
8314                   /* Operand is just <disp>  */
8315                   if (flag_code == CODE_64BIT)
8316                     {
8317                       /* 64bit mode overwrites the 32bit absolute
8318                          addressing by RIP relative addressing and
8319                          absolute addressing is encoded by one of the
8320                          redundant SIB forms.  */
8321                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8322                       i.sib.base = NO_BASE_REGISTER;
8323                       i.sib.index = NO_INDEX_REGISTER;
8324                       newdisp = (want_disp32(&i.tm) ? disp32 : disp32s);
8325                     }
8326                   else if ((flag_code == CODE_16BIT)
8327                            ^ (i.prefix[ADDR_PREFIX] != 0))
8328                     {
8329                       i.rm.regmem = NO_BASE_REGISTER_16;
8330                       newdisp = disp16;
8331                     }
8332                   else
8333                     {
8334                       i.rm.regmem = NO_BASE_REGISTER;
8335                       newdisp = disp32;
8336                     }
8337                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8338                   i.types[op] = operand_type_or (i.types[op], newdisp);
8339                 }
8340               else if (!i.tm.opcode_modifier.sib)
8341                 {
8342                   /* !i.base_reg && i.index_reg  */
8343                   if (i.index_reg->reg_num == RegIZ)
8344                     i.sib.index = NO_INDEX_REGISTER;
8345                   else
8346                     i.sib.index = i.index_reg->reg_num;
8347                   i.sib.base = NO_BASE_REGISTER;
8348                   i.sib.scale = i.log2_scale_factor;
8349                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8350                   i.types[op].bitfield.disp8 = 0;
8351                   i.types[op].bitfield.disp16 = 0;
8352                   i.types[op].bitfield.disp64 = 0;
8353                   if (want_disp32 (&i.tm))
8354                     {
8355                       /* Must be 32 bit */
8356                       i.types[op].bitfield.disp32 = 1;
8357                       i.types[op].bitfield.disp32s = 0;
8358                     }
8359                   else
8360                     {
8361                       i.types[op].bitfield.disp32 = 0;
8362                       i.types[op].bitfield.disp32s = 1;
8363                     }
8364                   if ((i.index_reg->reg_flags & RegRex) != 0)
8365                     i.rex |= REX_X;
8366                 }
8367             }
8368           /* RIP addressing for 64bit mode.  */
8369           else if (i.base_reg->reg_num == RegIP)
8370             {
8371               gas_assert (!i.tm.opcode_modifier.sib);
8372               i.rm.regmem = NO_BASE_REGISTER;
8373               i.types[op].bitfield.disp8 = 0;
8374               i.types[op].bitfield.disp16 = 0;
8375               i.types[op].bitfield.disp32 = 0;
8376               i.types[op].bitfield.disp32s = 1;
8377               i.types[op].bitfield.disp64 = 0;
8378               i.flags[op] |= Operand_PCrel;
8379               if (! i.disp_operands)
8380                 fake_zero_displacement = 1;
8381             }
8382           else if (i.base_reg->reg_type.bitfield.word)
8383             {
8384               gas_assert (!i.tm.opcode_modifier.sib);
8385               switch (i.base_reg->reg_num)
8386                 {
8387                 case 3: /* (%bx)  */
8388                   if (i.index_reg == 0)
8389                     i.rm.regmem = 7;
8390                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8391                     i.rm.regmem = i.index_reg->reg_num - 6;
8392                   break;
8393                 case 5: /* (%bp)  */
8394                   default_seg = reg_ss;
8395                   if (i.index_reg == 0)
8396                     {
8397                       i.rm.regmem = 6;
8398                       if (operand_type_check (i.types[op], disp) == 0)
8399                         {
8400                           /* fake (%bp) into 0(%bp)  */
8401                           if (i.disp_encoding == disp_encoding_16bit)
8402                             i.types[op].bitfield.disp16 = 1;
8403                           else
8404                             i.types[op].bitfield.disp8 = 1;
8405                           fake_zero_displacement = 1;
8406                         }
8407                     }
8408                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8409                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8410                   break;
8411                 default: /* (%si) -> 4 or (%di) -> 5  */
8412                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8413                 }
8414               if (!fake_zero_displacement
8415                   && !i.disp_operands
8416                   && i.disp_encoding)
8417                 {
8418                   fake_zero_displacement = 1;
8419                   if (i.disp_encoding == disp_encoding_8bit)
8420                     i.types[op].bitfield.disp8 = 1;
8421                   else
8422                     i.types[op].bitfield.disp16 = 1;
8423                 }
8424               i.rm.mode = mode_from_disp_size (i.types[op]);
8425             }
8426           else /* i.base_reg and 32/64 bit mode  */
8427             {
8428               if (operand_type_check (i.types[op], disp))
8429                 {
8430                   i.types[op].bitfield.disp16 = 0;
8431                   i.types[op].bitfield.disp64 = 0;
8432                   if (!want_disp32 (&i.tm))
8433                     {
8434                       i.types[op].bitfield.disp32 = 0;
8435                       i.types[op].bitfield.disp32s = 1;
8436                     }
8437                   else
8438                     {
8439                       i.types[op].bitfield.disp32 = 1;
8440                       i.types[op].bitfield.disp32s = 0;
8441                     }
8442                 }
8443
8444               if (!i.tm.opcode_modifier.sib)
8445                 i.rm.regmem = i.base_reg->reg_num;
8446               if ((i.base_reg->reg_flags & RegRex) != 0)
8447                 i.rex |= REX_B;
8448               i.sib.base = i.base_reg->reg_num;
8449               /* x86-64 ignores REX prefix bit here to avoid decoder
8450                  complications.  */
8451               if (!(i.base_reg->reg_flags & RegRex)
8452                   && (i.base_reg->reg_num == EBP_REG_NUM
8453                    || i.base_reg->reg_num == ESP_REG_NUM))
8454                   default_seg = reg_ss;
8455               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8456                 {
8457                   fake_zero_displacement = 1;
8458                   if (i.disp_encoding == disp_encoding_32bit)
8459                     i.types[op].bitfield.disp32 = 1;
8460                   else
8461                     i.types[op].bitfield.disp8 = 1;
8462                 }
8463               i.sib.scale = i.log2_scale_factor;
8464               if (i.index_reg == 0)
8465                 {
8466                   /* Only check for VSIB. */
8467                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8468                               && i.tm.opcode_modifier.sib != VECSIB256
8469                               && i.tm.opcode_modifier.sib != VECSIB512);
8470
8471                   /* <disp>(%esp) becomes two byte modrm with no index
8472                      register.  We've already stored the code for esp
8473                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8474                      Any base register besides %esp will not use the
8475                      extra modrm byte.  */
8476                   i.sib.index = NO_INDEX_REGISTER;
8477                 }
8478               else if (!i.tm.opcode_modifier.sib)
8479                 {
8480                   if (i.index_reg->reg_num == RegIZ)
8481                     i.sib.index = NO_INDEX_REGISTER;
8482                   else
8483                     i.sib.index = i.index_reg->reg_num;
8484                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8485                   if ((i.index_reg->reg_flags & RegRex) != 0)
8486                     i.rex |= REX_X;
8487                 }
8488
8489               if (i.disp_operands
8490                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8491                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8492                 i.rm.mode = 0;
8493               else
8494                 {
8495                   if (!fake_zero_displacement
8496                       && !i.disp_operands
8497                       && i.disp_encoding)
8498                     {
8499                       fake_zero_displacement = 1;
8500                       if (i.disp_encoding == disp_encoding_8bit)
8501                         i.types[op].bitfield.disp8 = 1;
8502                       else
8503                         i.types[op].bitfield.disp32 = 1;
8504                     }
8505                   i.rm.mode = mode_from_disp_size (i.types[op]);
8506                 }
8507             }
8508
8509           if (fake_zero_displacement)
8510             {
8511               /* Fakes a zero displacement assuming that i.types[op]
8512                  holds the correct displacement size.  */
8513               expressionS *exp;
8514
8515               gas_assert (i.op[op].disps == 0);
8516               exp = &disp_expressions[i.disp_operands++];
8517               i.op[op].disps = exp;
8518               exp->X_op = O_constant;
8519               exp->X_add_number = 0;
8520               exp->X_add_symbol = (symbolS *) 0;
8521               exp->X_op_symbol = (symbolS *) 0;
8522             }
8523
8524           mem = op;
8525         }
8526       else
8527         mem = ~0;
8528
8529       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8530         {
8531           if (operand_type_check (i.types[0], imm))
8532             i.vex.register_specifier = NULL;
8533           else
8534             {
8535               /* VEX.vvvv encodes one of the sources when the first
8536                  operand is not an immediate.  */
8537               if (i.tm.opcode_modifier.vexw == VEXW0)
8538                 i.vex.register_specifier = i.op[0].regs;
8539               else
8540                 i.vex.register_specifier = i.op[1].regs;
8541             }
8542
8543           /* Destination is a XMM register encoded in the ModRM.reg
8544              and VEX.R bit.  */
8545           i.rm.reg = i.op[2].regs->reg_num;
8546           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8547             i.rex |= REX_R;
8548
8549           /* ModRM.rm and VEX.B encodes the other source.  */
8550           if (!i.mem_operands)
8551             {
8552               i.rm.mode = 3;
8553
8554               if (i.tm.opcode_modifier.vexw == VEXW0)
8555                 i.rm.regmem = i.op[1].regs->reg_num;
8556               else
8557                 i.rm.regmem = i.op[0].regs->reg_num;
8558
8559               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8560                 i.rex |= REX_B;
8561             }
8562         }
8563       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8564         {
8565           i.vex.register_specifier = i.op[2].regs;
8566           if (!i.mem_operands)
8567             {
8568               i.rm.mode = 3;
8569               i.rm.regmem = i.op[1].regs->reg_num;
8570               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8571                 i.rex |= REX_B;
8572             }
8573         }
8574       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8575          (if any) based on i.tm.extension_opcode.  Again, we must be
8576          careful to make sure that segment/control/debug/test/MMX
8577          registers are coded into the i.rm.reg field.  */
8578       else if (i.reg_operands)
8579         {
8580           unsigned int op;
8581           unsigned int vex_reg = ~0;
8582
8583           for (op = 0; op < i.operands; op++)
8584             if (i.types[op].bitfield.class == Reg
8585                 || i.types[op].bitfield.class == RegBND
8586                 || i.types[op].bitfield.class == RegMask
8587                 || i.types[op].bitfield.class == SReg
8588                 || i.types[op].bitfield.class == RegCR
8589                 || i.types[op].bitfield.class == RegDR
8590                 || i.types[op].bitfield.class == RegTR
8591                 || i.types[op].bitfield.class == RegSIMD
8592                 || i.types[op].bitfield.class == RegMMX)
8593               break;
8594
8595           if (vex_3_sources)
8596             op = dest;
8597           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8598             {
8599               /* For instructions with VexNDS, the register-only
8600                  source operand is encoded in VEX prefix. */
8601               gas_assert (mem != (unsigned int) ~0);
8602
8603               if (op > mem)
8604                 {
8605                   vex_reg = op++;
8606                   gas_assert (op < i.operands);
8607                 }
8608               else
8609                 {
8610                   /* Check register-only source operand when two source
8611                      operands are swapped.  */
8612                   if (!i.tm.operand_types[op].bitfield.baseindex
8613                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8614                     {
8615                       vex_reg = op;
8616                       op += 2;
8617                       gas_assert (mem == (vex_reg + 1)
8618                                   && op < i.operands);
8619                     }
8620                   else
8621                     {
8622                       vex_reg = op + 1;
8623                       gas_assert (vex_reg < i.operands);
8624                     }
8625                 }
8626             }
8627           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8628             {
8629               /* For instructions with VexNDD, the register destination
8630                  is encoded in VEX prefix.  */
8631               if (i.mem_operands == 0)
8632                 {
8633                   /* There is no memory operand.  */
8634                   gas_assert ((op + 2) == i.operands);
8635                   vex_reg = op + 1;
8636                 }
8637               else
8638                 {
8639                   /* There are only 2 non-immediate operands.  */
8640                   gas_assert (op < i.imm_operands + 2
8641                               && i.operands == i.imm_operands + 2);
8642                   vex_reg = i.imm_operands + 1;
8643                 }
8644             }
8645           else
8646             gas_assert (op < i.operands);
8647
8648           if (vex_reg != (unsigned int) ~0)
8649             {
8650               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8651
8652               if ((type->bitfield.class != Reg
8653                    || (!type->bitfield.dword && !type->bitfield.qword))
8654                   && type->bitfield.class != RegSIMD
8655                   && !operand_type_equal (type, &regmask))
8656                 abort ();
8657
8658               i.vex.register_specifier = i.op[vex_reg].regs;
8659             }
8660
8661           /* Don't set OP operand twice.  */
8662           if (vex_reg != op)
8663             {
8664               /* If there is an extension opcode to put here, the
8665                  register number must be put into the regmem field.  */
8666               if (i.tm.extension_opcode != None)
8667                 {
8668                   i.rm.regmem = i.op[op].regs->reg_num;
8669                   set_rex_vrex (i.op[op].regs, REX_B,
8670                                 i.tm.opcode_modifier.sse2avx);
8671                 }
8672               else
8673                 {
8674                   i.rm.reg = i.op[op].regs->reg_num;
8675                   set_rex_vrex (i.op[op].regs, REX_R,
8676                                 i.tm.opcode_modifier.sse2avx);
8677                 }
8678             }
8679
8680           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8681              must set it to 3 to indicate this is a register operand
8682              in the regmem field.  */
8683           if (!i.mem_operands)
8684             i.rm.mode = 3;
8685         }
8686
8687       /* Fill in i.rm.reg field with extension opcode (if any).  */
8688       if (i.tm.extension_opcode != None)
8689         i.rm.reg = i.tm.extension_opcode;
8690     }
8691   return default_seg;
8692 }
8693
8694 static INLINE void
8695 frag_opcode_byte (unsigned char byte)
8696 {
8697   if (now_seg != absolute_section)
8698     FRAG_APPEND_1_CHAR (byte);
8699   else
8700     ++abs_section_offset;
8701 }
8702
8703 static unsigned int
8704 flip_code16 (unsigned int code16)
8705 {
8706   gas_assert (i.tm.operands == 1);
8707
8708   return !(i.prefix[REX_PREFIX] & REX_W)
8709          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8710                       || i.tm.operand_types[0].bitfield.disp32s
8711                     : i.tm.operand_types[0].bitfield.disp16)
8712          ? CODE16 : 0;
8713 }
8714
8715 static void
8716 output_branch (void)
8717 {
8718   char *p;
8719   int size;
8720   int code16;
8721   int prefix;
8722   relax_substateT subtype;
8723   symbolS *sym;
8724   offsetT off;
8725
8726   if (now_seg == absolute_section)
8727     {
8728       as_bad (_("relaxable branches not supported in absolute section"));
8729       return;
8730     }
8731
8732   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8733   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
8734
8735   prefix = 0;
8736   if (i.prefix[DATA_PREFIX] != 0)
8737     {
8738       prefix = 1;
8739       i.prefixes -= 1;
8740       code16 ^= flip_code16(code16);
8741     }
8742   /* Pentium4 branch hints.  */
8743   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8744       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8745     {
8746       prefix++;
8747       i.prefixes--;
8748     }
8749   if (i.prefix[REX_PREFIX] != 0)
8750     {
8751       prefix++;
8752       i.prefixes--;
8753     }
8754
8755   /* BND prefixed jump.  */
8756   if (i.prefix[BND_PREFIX] != 0)
8757     {
8758       prefix++;
8759       i.prefixes--;
8760     }
8761
8762   if (i.prefixes != 0)
8763     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8764
8765   /* It's always a symbol;  End frag & setup for relax.
8766      Make sure there is enough room in this frag for the largest
8767      instruction we may generate in md_convert_frag.  This is 2
8768      bytes for the opcode and room for the prefix and largest
8769      displacement.  */
8770   frag_grow (prefix + 2 + 4);
8771   /* Prefix and 1 opcode byte go in fr_fix.  */
8772   p = frag_more (prefix + 1);
8773   if (i.prefix[DATA_PREFIX] != 0)
8774     *p++ = DATA_PREFIX_OPCODE;
8775   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8776       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8777     *p++ = i.prefix[SEG_PREFIX];
8778   if (i.prefix[BND_PREFIX] != 0)
8779     *p++ = BND_PREFIX_OPCODE;
8780   if (i.prefix[REX_PREFIX] != 0)
8781     *p++ = i.prefix[REX_PREFIX];
8782   *p = i.tm.base_opcode;
8783
8784   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8785     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8786   else if (cpu_arch_flags.bitfield.cpui386)
8787     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8788   else
8789     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8790   subtype |= code16;
8791
8792   sym = i.op[0].disps->X_add_symbol;
8793   off = i.op[0].disps->X_add_number;
8794
8795   if (i.op[0].disps->X_op != O_constant
8796       && i.op[0].disps->X_op != O_symbol)
8797     {
8798       /* Handle complex expressions.  */
8799       sym = make_expr_symbol (i.op[0].disps);
8800       off = 0;
8801     }
8802
8803   /* 1 possible extra opcode + 4 byte displacement go in var part.
8804      Pass reloc in fr_var.  */
8805   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8806 }
8807
8808 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8809 /* Return TRUE iff PLT32 relocation should be used for branching to
8810    symbol S.  */
8811
8812 static bool
8813 need_plt32_p (symbolS *s)
8814 {
8815   /* PLT32 relocation is ELF only.  */
8816   if (!IS_ELF)
8817     return false;
8818
8819 #ifdef TE_SOLARIS
8820   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8821      krtld support it.  */
8822   return false;
8823 #endif
8824
8825   /* Since there is no need to prepare for PLT branch on x86-64, we
8826      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8827      be used as a marker for 32-bit PC-relative branches.  */
8828   if (!object_64bit)
8829     return false;
8830
8831   if (s == NULL)
8832     return false;
8833
8834   /* Weak or undefined symbol need PLT32 relocation.  */
8835   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8836     return true;
8837
8838   /* Non-global symbol doesn't need PLT32 relocation.  */
8839   if (! S_IS_EXTERNAL (s))
8840     return false;
8841
8842   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8843      non-default visibilities are treated as normal global symbol
8844      so that PLT32 relocation can be used as a marker for 32-bit
8845      PC-relative branches.  It is useful for linker relaxation.  */
8846   return true;
8847 }
8848 #endif
8849
8850 static void
8851 output_jump (void)
8852 {
8853   char *p;
8854   int size;
8855   fixS *fixP;
8856   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8857
8858   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8859     {
8860       /* This is a loop or jecxz type instruction.  */
8861       size = 1;
8862       if (i.prefix[ADDR_PREFIX] != 0)
8863         {
8864           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8865           i.prefixes -= 1;
8866         }
8867       /* Pentium4 branch hints.  */
8868       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8869           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8870         {
8871           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8872           i.prefixes--;
8873         }
8874     }
8875   else
8876     {
8877       int code16;
8878
8879       code16 = 0;
8880       if (flag_code == CODE_16BIT)
8881         code16 = CODE16;
8882
8883       if (i.prefix[DATA_PREFIX] != 0)
8884         {
8885           frag_opcode_byte (DATA_PREFIX_OPCODE);
8886           i.prefixes -= 1;
8887           code16 ^= flip_code16(code16);
8888         }
8889
8890       size = 4;
8891       if (code16)
8892         size = 2;
8893     }
8894
8895   /* BND prefixed jump.  */
8896   if (i.prefix[BND_PREFIX] != 0)
8897     {
8898       frag_opcode_byte (i.prefix[BND_PREFIX]);
8899       i.prefixes -= 1;
8900     }
8901
8902   if (i.prefix[REX_PREFIX] != 0)
8903     {
8904       frag_opcode_byte (i.prefix[REX_PREFIX]);
8905       i.prefixes -= 1;
8906     }
8907
8908   if (i.prefixes != 0)
8909     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8910
8911   if (now_seg == absolute_section)
8912     {
8913       abs_section_offset += i.opcode_length + size;
8914       return;
8915     }
8916
8917   p = frag_more (i.opcode_length + size);
8918   switch (i.opcode_length)
8919     {
8920     case 2:
8921       *p++ = i.tm.base_opcode >> 8;
8922       /* Fall through.  */
8923     case 1:
8924       *p++ = i.tm.base_opcode;
8925       break;
8926     default:
8927       abort ();
8928     }
8929
8930 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8931   if (size == 4
8932       && jump_reloc == NO_RELOC
8933       && need_plt32_p (i.op[0].disps->X_add_symbol))
8934     jump_reloc = BFD_RELOC_X86_64_PLT32;
8935 #endif
8936
8937   jump_reloc = reloc (size, 1, 1, jump_reloc);
8938
8939   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8940                       i.op[0].disps, 1, jump_reloc);
8941
8942   /* All jumps handled here are signed, but don't unconditionally use a
8943      signed limit check for 32 and 16 bit jumps as we want to allow wrap
8944      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
8945      respectively.  */
8946   switch (size)
8947     {
8948     case 1:
8949       fixP->fx_signed = 1;
8950       break;
8951
8952     case 2:
8953       if (i.tm.base_opcode == 0xc7f8)
8954         fixP->fx_signed = 1;
8955       break;
8956
8957     case 4:
8958       if (flag_code == CODE_64BIT)
8959         fixP->fx_signed = 1;
8960       break;
8961     }
8962 }
8963
8964 static void
8965 output_interseg_jump (void)
8966 {
8967   char *p;
8968   int size;
8969   int prefix;
8970   int code16;
8971
8972   code16 = 0;
8973   if (flag_code == CODE_16BIT)
8974     code16 = CODE16;
8975
8976   prefix = 0;
8977   if (i.prefix[DATA_PREFIX] != 0)
8978     {
8979       prefix = 1;
8980       i.prefixes -= 1;
8981       code16 ^= CODE16;
8982     }
8983
8984   gas_assert (!i.prefix[REX_PREFIX]);
8985
8986   size = 4;
8987   if (code16)
8988     size = 2;
8989
8990   if (i.prefixes != 0)
8991     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8992
8993   if (now_seg == absolute_section)
8994     {
8995       abs_section_offset += prefix + 1 + 2 + size;
8996       return;
8997     }
8998
8999   /* 1 opcode; 2 segment; offset  */
9000   p = frag_more (prefix + 1 + 2 + size);
9001
9002   if (i.prefix[DATA_PREFIX] != 0)
9003     *p++ = DATA_PREFIX_OPCODE;
9004
9005   if (i.prefix[REX_PREFIX] != 0)
9006     *p++ = i.prefix[REX_PREFIX];
9007
9008   *p++ = i.tm.base_opcode;
9009   if (i.op[1].imms->X_op == O_constant)
9010     {
9011       offsetT n = i.op[1].imms->X_add_number;
9012
9013       if (size == 2
9014           && !fits_in_unsigned_word (n)
9015           && !fits_in_signed_word (n))
9016         {
9017           as_bad (_("16-bit jump out of range"));
9018           return;
9019         }
9020       md_number_to_chars (p, n, size);
9021     }
9022   else
9023     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9024                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9025
9026   p += size;
9027   if (i.op[0].imms->X_op == O_constant)
9028     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9029   else
9030     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9031                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9032 }
9033
9034 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9035 void
9036 x86_cleanup (void)
9037 {
9038   char *p;
9039   asection *seg = now_seg;
9040   subsegT subseg = now_subseg;
9041   asection *sec;
9042   unsigned int alignment, align_size_1;
9043   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9044   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9045   unsigned int padding;
9046
9047   if (!IS_ELF || !x86_used_note)
9048     return;
9049
9050   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9051
9052   /* The .note.gnu.property section layout:
9053
9054      Field      Length          Contents
9055      ----       ----            ----
9056      n_namsz    4               4
9057      n_descsz   4               The note descriptor size
9058      n_type     4               NT_GNU_PROPERTY_TYPE_0
9059      n_name     4               "GNU"
9060      n_desc     n_descsz        The program property array
9061      ....       ....            ....
9062    */
9063
9064   /* Create the .note.gnu.property section.  */
9065   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9066   bfd_set_section_flags (sec,
9067                          (SEC_ALLOC
9068                           | SEC_LOAD
9069                           | SEC_DATA
9070                           | SEC_HAS_CONTENTS
9071                           | SEC_READONLY));
9072
9073   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9074     {
9075       align_size_1 = 7;
9076       alignment = 3;
9077     }
9078   else
9079     {
9080       align_size_1 = 3;
9081       alignment = 2;
9082     }
9083
9084   bfd_set_section_alignment (sec, alignment);
9085   elf_section_type (sec) = SHT_NOTE;
9086
9087   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9088                                   + 4-byte data  */
9089   isa_1_descsz_raw = 4 + 4 + 4;
9090   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9091   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9092
9093   feature_2_descsz_raw = isa_1_descsz;
9094   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9095                                       + 4-byte data  */
9096   feature_2_descsz_raw += 4 + 4 + 4;
9097   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9098   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9099                       & ~align_size_1);
9100
9101   descsz = feature_2_descsz;
9102   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9103   p = frag_more (4 + 4 + 4 + 4 + descsz);
9104
9105   /* Write n_namsz.  */
9106   md_number_to_chars (p, (valueT) 4, 4);
9107
9108   /* Write n_descsz.  */
9109   md_number_to_chars (p + 4, (valueT) descsz, 4);
9110
9111   /* Write n_type.  */
9112   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9113
9114   /* Write n_name.  */
9115   memcpy (p + 4 * 3, "GNU", 4);
9116
9117   /* Write 4-byte type.  */
9118   md_number_to_chars (p + 4 * 4,
9119                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9120
9121   /* Write 4-byte data size.  */
9122   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9123
9124   /* Write 4-byte data.  */
9125   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9126
9127   /* Zero out paddings.  */
9128   padding = isa_1_descsz - isa_1_descsz_raw;
9129   if (padding)
9130     memset (p + 4 * 7, 0, padding);
9131
9132   /* Write 4-byte type.  */
9133   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9134                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9135
9136   /* Write 4-byte data size.  */
9137   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9138
9139   /* Write 4-byte data.  */
9140   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9141                       (valueT) x86_feature_2_used, 4);
9142
9143   /* Zero out paddings.  */
9144   padding = feature_2_descsz - feature_2_descsz_raw;
9145   if (padding)
9146     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9147
9148   /* We probably can't restore the current segment, for there likely
9149      isn't one yet...  */
9150   if (seg && subseg)
9151     subseg_set (seg, subseg);
9152 }
9153 #endif
9154
9155 static unsigned int
9156 encoding_length (const fragS *start_frag, offsetT start_off,
9157                  const char *frag_now_ptr)
9158 {
9159   unsigned int len = 0;
9160
9161   if (start_frag != frag_now)
9162     {
9163       const fragS *fr = start_frag;
9164
9165       do {
9166         len += fr->fr_fix;
9167         fr = fr->fr_next;
9168       } while (fr && fr != frag_now);
9169     }
9170
9171   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9172 }
9173
9174 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9175    be macro-fused with conditional jumps.
9176    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9177    or is one of the following format:
9178
9179     cmp m, imm
9180     add m, imm
9181     sub m, imm
9182    test m, imm
9183     and m, imm
9184     inc m
9185     dec m
9186
9187    it is unfusible.  */
9188
9189 static int
9190 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9191 {
9192   /* No RIP address.  */
9193   if (i.base_reg && i.base_reg->reg_num == RegIP)
9194     return 0;
9195
9196   /* No opcodes outside of base encoding space.  */
9197   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9198     return 0;
9199
9200   /* add, sub without add/sub m, imm.  */
9201   if (i.tm.base_opcode <= 5
9202       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9203       || ((i.tm.base_opcode | 3) == 0x83
9204           && (i.tm.extension_opcode == 0x5
9205               || i.tm.extension_opcode == 0x0)))
9206     {
9207       *mf_cmp_p = mf_cmp_alu_cmp;
9208       return !(i.mem_operands && i.imm_operands);
9209     }
9210
9211   /* and without and m, imm.  */
9212   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9213       || ((i.tm.base_opcode | 3) == 0x83
9214           && i.tm.extension_opcode == 0x4))
9215     {
9216       *mf_cmp_p = mf_cmp_test_and;
9217       return !(i.mem_operands && i.imm_operands);
9218     }
9219
9220   /* test without test m imm.  */
9221   if ((i.tm.base_opcode | 1) == 0x85
9222       || (i.tm.base_opcode | 1) == 0xa9
9223       || ((i.tm.base_opcode | 1) == 0xf7
9224           && i.tm.extension_opcode == 0))
9225     {
9226       *mf_cmp_p = mf_cmp_test_and;
9227       return !(i.mem_operands && i.imm_operands);
9228     }
9229
9230   /* cmp without cmp m, imm.  */
9231   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9232       || ((i.tm.base_opcode | 3) == 0x83
9233           && (i.tm.extension_opcode == 0x7)))
9234     {
9235       *mf_cmp_p = mf_cmp_alu_cmp;
9236       return !(i.mem_operands && i.imm_operands);
9237     }
9238
9239   /* inc, dec without inc/dec m.   */
9240   if ((i.tm.cpu_flags.bitfield.cpuno64
9241        && (i.tm.base_opcode | 0xf) == 0x4f)
9242       || ((i.tm.base_opcode | 1) == 0xff
9243           && i.tm.extension_opcode <= 0x1))
9244     {
9245       *mf_cmp_p = mf_cmp_incdec;
9246       return !i.mem_operands;
9247     }
9248
9249   return 0;
9250 }
9251
9252 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9253
9254 static int
9255 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9256 {
9257   /* NB: Don't work with COND_JUMP86 without i386.  */
9258   if (!align_branch_power
9259       || now_seg == absolute_section
9260       || !cpu_arch_flags.bitfield.cpui386
9261       || !(align_branch & align_branch_fused_bit))
9262     return 0;
9263
9264   if (maybe_fused_with_jcc_p (mf_cmp_p))
9265     {
9266       if (last_insn.kind == last_insn_other
9267           || last_insn.seg != now_seg)
9268         return 1;
9269       if (flag_debug)
9270         as_warn_where (last_insn.file, last_insn.line,
9271                        _("`%s` skips -malign-branch-boundary on `%s`"),
9272                        last_insn.name, i.tm.name);
9273     }
9274
9275   return 0;
9276 }
9277
9278 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9279
9280 static int
9281 add_branch_prefix_frag_p (void)
9282 {
9283   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9284      to PadLock instructions since they include prefixes in opcode.  */
9285   if (!align_branch_power
9286       || !align_branch_prefix_size
9287       || now_seg == absolute_section
9288       || i.tm.cpu_flags.bitfield.cpupadlock
9289       || !cpu_arch_flags.bitfield.cpui386)
9290     return 0;
9291
9292   /* Don't add prefix if it is a prefix or there is no operand in case
9293      that segment prefix is special.  */
9294   if (!i.operands || i.tm.opcode_modifier.isprefix)
9295     return 0;
9296
9297   if (last_insn.kind == last_insn_other
9298       || last_insn.seg != now_seg)
9299     return 1;
9300
9301   if (flag_debug)
9302     as_warn_where (last_insn.file, last_insn.line,
9303                    _("`%s` skips -malign-branch-boundary on `%s`"),
9304                    last_insn.name, i.tm.name);
9305
9306   return 0;
9307 }
9308
9309 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9310
9311 static int
9312 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9313                            enum mf_jcc_kind *mf_jcc_p)
9314 {
9315   int add_padding;
9316
9317   /* NB: Don't work with COND_JUMP86 without i386.  */
9318   if (!align_branch_power
9319       || now_seg == absolute_section
9320       || !cpu_arch_flags.bitfield.cpui386
9321       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9322     return 0;
9323
9324   add_padding = 0;
9325
9326   /* Check for jcc and direct jmp.  */
9327   if (i.tm.opcode_modifier.jump == JUMP)
9328     {
9329       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9330         {
9331           *branch_p = align_branch_jmp;
9332           add_padding = align_branch & align_branch_jmp_bit;
9333         }
9334       else
9335         {
9336           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9337              igore the lowest bit.  */
9338           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9339           *branch_p = align_branch_jcc;
9340           if ((align_branch & align_branch_jcc_bit))
9341             add_padding = 1;
9342         }
9343     }
9344   else if ((i.tm.base_opcode | 1) == 0xc3)
9345     {
9346       /* Near ret.  */
9347       *branch_p = align_branch_ret;
9348       if ((align_branch & align_branch_ret_bit))
9349         add_padding = 1;
9350     }
9351   else
9352     {
9353       /* Check for indirect jmp, direct and indirect calls.  */
9354       if (i.tm.base_opcode == 0xe8)
9355         {
9356           /* Direct call.  */
9357           *branch_p = align_branch_call;
9358           if ((align_branch & align_branch_call_bit))
9359             add_padding = 1;
9360         }
9361       else if (i.tm.base_opcode == 0xff
9362                && (i.tm.extension_opcode == 2
9363                    || i.tm.extension_opcode == 4))
9364         {
9365           /* Indirect call and jmp.  */
9366           *branch_p = align_branch_indirect;
9367           if ((align_branch & align_branch_indirect_bit))
9368             add_padding = 1;
9369         }
9370
9371       if (add_padding
9372           && i.disp_operands
9373           && tls_get_addr
9374           && (i.op[0].disps->X_op == O_symbol
9375               || (i.op[0].disps->X_op == O_subtract
9376                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9377         {
9378           symbolS *s = i.op[0].disps->X_add_symbol;
9379           /* No padding to call to global or undefined tls_get_addr.  */
9380           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9381               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9382             return 0;
9383         }
9384     }
9385
9386   if (add_padding
9387       && last_insn.kind != last_insn_other
9388       && last_insn.seg == now_seg)
9389     {
9390       if (flag_debug)
9391         as_warn_where (last_insn.file, last_insn.line,
9392                        _("`%s` skips -malign-branch-boundary on `%s`"),
9393                        last_insn.name, i.tm.name);
9394       return 0;
9395     }
9396
9397   return add_padding;
9398 }
9399
9400 static void
9401 output_insn (void)
9402 {
9403   fragS *insn_start_frag;
9404   offsetT insn_start_off;
9405   fragS *fragP = NULL;
9406   enum align_branch_kind branch = align_branch_none;
9407   /* The initializer is arbitrary just to avoid uninitialized error.
9408      it's actually either assigned in add_branch_padding_frag_p
9409      or never be used.  */
9410   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9411
9412 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9413   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9414     {
9415       if ((i.xstate & xstate_tmm) == xstate_tmm
9416           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9417         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9418
9419       if (i.tm.cpu_flags.bitfield.cpu8087
9420           || i.tm.cpu_flags.bitfield.cpu287
9421           || i.tm.cpu_flags.bitfield.cpu387
9422           || i.tm.cpu_flags.bitfield.cpu687
9423           || i.tm.cpu_flags.bitfield.cpufisttp)
9424         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9425
9426       if ((i.xstate & xstate_mmx)
9427           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9428               && !is_any_vex_encoding (&i.tm)
9429               && (i.tm.base_opcode == 0x77 /* emms */
9430                   || i.tm.base_opcode == 0x0e /* femms */)))
9431         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9432
9433       if (i.index_reg)
9434         {
9435           if (i.index_reg->reg_type.bitfield.zmmword)
9436             i.xstate |= xstate_zmm;
9437           else if (i.index_reg->reg_type.bitfield.ymmword)
9438             i.xstate |= xstate_ymm;
9439           else if (i.index_reg->reg_type.bitfield.xmmword)
9440             i.xstate |= xstate_xmm;
9441         }
9442
9443       /* vzeroall / vzeroupper */
9444       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9445         i.xstate |= xstate_ymm;
9446
9447       if ((i.xstate & xstate_xmm)
9448           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9449           || (i.tm.base_opcode == 0xae
9450               && (i.tm.cpu_flags.bitfield.cpusse
9451                   || i.tm.cpu_flags.bitfield.cpuavx))
9452           || i.tm.cpu_flags.bitfield.cpuwidekl
9453           || i.tm.cpu_flags.bitfield.cpukl)
9454         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9455
9456       if ((i.xstate & xstate_ymm) == xstate_ymm)
9457         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9458       if ((i.xstate & xstate_zmm) == xstate_zmm)
9459         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9460       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9461         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9462       if (i.tm.cpu_flags.bitfield.cpufxsr)
9463         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9464       if (i.tm.cpu_flags.bitfield.cpuxsave)
9465         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9466       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9467         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9468       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9469         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9470
9471       if (x86_feature_2_used
9472           || i.tm.cpu_flags.bitfield.cpucmov
9473           || i.tm.cpu_flags.bitfield.cpusyscall
9474           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9475               && i.tm.base_opcode == 0xc7
9476               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9477               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9478         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9479       if (i.tm.cpu_flags.bitfield.cpusse3
9480           || i.tm.cpu_flags.bitfield.cpussse3
9481           || i.tm.cpu_flags.bitfield.cpusse4_1
9482           || i.tm.cpu_flags.bitfield.cpusse4_2
9483           || i.tm.cpu_flags.bitfield.cpucx16
9484           || i.tm.cpu_flags.bitfield.cpupopcnt
9485           /* LAHF-SAHF insns in 64-bit mode.  */
9486           || (flag_code == CODE_64BIT
9487               && (i.tm.base_opcode | 1) == 0x9f
9488               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9489         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9490       if (i.tm.cpu_flags.bitfield.cpuavx
9491           || i.tm.cpu_flags.bitfield.cpuavx2
9492           /* Any VEX encoded insns execpt for CpuAVX512F, CpuAVX512BW,
9493              CpuAVX512DQ, LPW, TBM and AMX.  */
9494           || (i.tm.opcode_modifier.vex
9495               && !i.tm.cpu_flags.bitfield.cpuavx512f
9496               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9497               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9498               && !i.tm.cpu_flags.bitfield.cpulwp
9499               && !i.tm.cpu_flags.bitfield.cputbm
9500               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9501           || i.tm.cpu_flags.bitfield.cpuf16c
9502           || i.tm.cpu_flags.bitfield.cpufma
9503           || i.tm.cpu_flags.bitfield.cpulzcnt
9504           || i.tm.cpu_flags.bitfield.cpumovbe
9505           || i.tm.cpu_flags.bitfield.cpuxsaves
9506           || (x86_feature_2_used
9507               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9508                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9509                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9510         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9511       if (i.tm.cpu_flags.bitfield.cpuavx512f
9512           || i.tm.cpu_flags.bitfield.cpuavx512bw
9513           || i.tm.cpu_flags.bitfield.cpuavx512dq
9514           || i.tm.cpu_flags.bitfield.cpuavx512vl
9515           /* Any EVEX encoded insns except for AVX512ER, AVX512PF and
9516              VNNIW.  */
9517           || (i.tm.opcode_modifier.evex
9518               && !i.tm.cpu_flags.bitfield.cpuavx512er
9519               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9520               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9521         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9522     }
9523 #endif
9524
9525   /* Tie dwarf2 debug info to the address at the start of the insn.
9526      We can't do this after the insn has been output as the current
9527      frag may have been closed off.  eg. by frag_var.  */
9528   dwarf2_emit_insn (0);
9529
9530   insn_start_frag = frag_now;
9531   insn_start_off = frag_now_fix ();
9532
9533   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9534     {
9535       char *p;
9536       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9537       unsigned int max_branch_padding_size = 14;
9538
9539       /* Align section to boundary.  */
9540       record_alignment (now_seg, align_branch_power);
9541
9542       /* Make room for padding.  */
9543       frag_grow (max_branch_padding_size);
9544
9545       /* Start of the padding.  */
9546       p = frag_more (0);
9547
9548       fragP = frag_now;
9549
9550       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9551                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9552                 NULL, 0, p);
9553
9554       fragP->tc_frag_data.mf_type = mf_jcc;
9555       fragP->tc_frag_data.branch_type = branch;
9556       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9557     }
9558
9559   /* Output jumps.  */
9560   if (i.tm.opcode_modifier.jump == JUMP)
9561     output_branch ();
9562   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9563            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9564     output_jump ();
9565   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9566     output_interseg_jump ();
9567   else
9568     {
9569       /* Output normal instructions here.  */
9570       char *p;
9571       unsigned char *q;
9572       unsigned int j;
9573       enum mf_cmp_kind mf_cmp;
9574
9575       if (avoid_fence
9576           && (i.tm.base_opcode == 0xaee8
9577               || i.tm.base_opcode == 0xaef0
9578               || i.tm.base_opcode == 0xaef8))
9579         {
9580           /* Encode lfence, mfence, and sfence as
9581              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9582           if (now_seg != absolute_section)
9583             {
9584               offsetT val = 0x240483f0ULL;
9585
9586               p = frag_more (5);
9587               md_number_to_chars (p, val, 5);
9588             }
9589           else
9590             abs_section_offset += 5;
9591           return;
9592         }
9593
9594       /* Some processors fail on LOCK prefix. This options makes
9595          assembler ignore LOCK prefix and serves as a workaround.  */
9596       if (omit_lock_prefix)
9597         {
9598           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9599               && i.tm.opcode_modifier.isprefix)
9600             return;
9601           i.prefix[LOCK_PREFIX] = 0;
9602         }
9603
9604       if (branch)
9605         /* Skip if this is a branch.  */
9606         ;
9607       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9608         {
9609           /* Make room for padding.  */
9610           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9611           p = frag_more (0);
9612
9613           fragP = frag_now;
9614
9615           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9616                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9617                     NULL, 0, p);
9618
9619           fragP->tc_frag_data.mf_type = mf_cmp;
9620           fragP->tc_frag_data.branch_type = align_branch_fused;
9621           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9622         }
9623       else if (add_branch_prefix_frag_p ())
9624         {
9625           unsigned int max_prefix_size = align_branch_prefix_size;
9626
9627           /* Make room for padding.  */
9628           frag_grow (max_prefix_size);
9629           p = frag_more (0);
9630
9631           fragP = frag_now;
9632
9633           frag_var (rs_machine_dependent, max_prefix_size, 0,
9634                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9635                     NULL, 0, p);
9636
9637           fragP->tc_frag_data.max_bytes = max_prefix_size;
9638         }
9639
9640       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9641          don't need the explicit prefix.  */
9642       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
9643         {
9644           switch (i.tm.opcode_modifier.opcodeprefix)
9645             {
9646             case PREFIX_0X66:
9647               add_prefix (0x66);
9648               break;
9649             case PREFIX_0XF2:
9650               add_prefix (0xf2);
9651               break;
9652             case PREFIX_0XF3:
9653               if (!i.tm.cpu_flags.bitfield.cpupadlock
9654                   || (i.prefix[REP_PREFIX] != 0xf3))
9655                 add_prefix (0xf3);
9656               break;
9657             case PREFIX_NONE:
9658               switch (i.opcode_length)
9659                 {
9660                 case 2:
9661                   break;
9662                 case 1:
9663                   /* Check for pseudo prefixes.  */
9664                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9665                     break;
9666                   as_bad_where (insn_start_frag->fr_file,
9667                                 insn_start_frag->fr_line,
9668                                 _("pseudo prefix without instruction"));
9669                   return;
9670                 default:
9671                   abort ();
9672                 }
9673               break;
9674             default:
9675               abort ();
9676             }
9677
9678 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9679           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9680              R_X86_64_GOTTPOFF relocation so that linker can safely
9681              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9682              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9683              relocation for GDesc -> IE/LE optimization.  */
9684           if (x86_elf_abi == X86_64_X32_ABI
9685               && i.operands == 2
9686               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9687                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9688               && i.prefix[REX_PREFIX] == 0)
9689             add_prefix (REX_OPCODE);
9690 #endif
9691
9692           /* The prefix bytes.  */
9693           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9694             if (*q)
9695               frag_opcode_byte (*q);
9696         }
9697       else
9698         {
9699           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9700             if (*q)
9701               switch (j)
9702                 {
9703                 case SEG_PREFIX:
9704                 case ADDR_PREFIX:
9705                   frag_opcode_byte (*q);
9706                   break;
9707                 default:
9708                   /* There should be no other prefixes for instructions
9709                      with VEX prefix.  */
9710                   abort ();
9711                 }
9712
9713           /* For EVEX instructions i.vrex should become 0 after
9714              build_evex_prefix.  For VEX instructions upper 16 registers
9715              aren't available, so VREX should be 0.  */
9716           if (i.vrex)
9717             abort ();
9718           /* Now the VEX prefix.  */
9719           if (now_seg != absolute_section)
9720             {
9721               p = frag_more (i.vex.length);
9722               for (j = 0; j < i.vex.length; j++)
9723                 p[j] = i.vex.bytes[j];
9724             }
9725           else
9726             abs_section_offset += i.vex.length;
9727         }
9728
9729       /* Now the opcode; be careful about word order here!  */
9730       j = i.opcode_length;
9731       if (!i.vex.length)
9732         switch (i.tm.opcode_modifier.opcodespace)
9733           {
9734           case SPACE_BASE:
9735             break;
9736           case SPACE_0F:
9737             ++j;
9738             break;
9739           case SPACE_0F38:
9740           case SPACE_0F3A:
9741             j += 2;
9742             break;
9743           default:
9744             abort ();
9745           }
9746
9747       if (now_seg == absolute_section)
9748         abs_section_offset += j;
9749       else if (j == 1)
9750         {
9751           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9752         }
9753       else
9754         {
9755           p = frag_more (j);
9756           if (!i.vex.length
9757               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9758             {
9759               *p++ = 0x0f;
9760               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9761                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9762                        ? 0x38 : 0x3a;
9763             }
9764
9765           switch (i.opcode_length)
9766             {
9767             case 2:
9768               /* Put out high byte first: can't use md_number_to_chars!  */
9769               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9770               /* Fall through.  */
9771             case 1:
9772               *p = i.tm.base_opcode & 0xff;
9773               break;
9774             default:
9775               abort ();
9776               break;
9777             }
9778
9779         }
9780
9781       /* Now the modrm byte and sib byte (if present).  */
9782       if (i.tm.opcode_modifier.modrm)
9783         {
9784           frag_opcode_byte ((i.rm.regmem << 0)
9785                              | (i.rm.reg << 3)
9786                              | (i.rm.mode << 6));
9787           /* If i.rm.regmem == ESP (4)
9788              && i.rm.mode != (Register mode)
9789              && not 16 bit
9790              ==> need second modrm byte.  */
9791           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9792               && i.rm.mode != 3
9793               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9794             frag_opcode_byte ((i.sib.base << 0)
9795                               | (i.sib.index << 3)
9796                               | (i.sib.scale << 6));
9797         }
9798
9799       if (i.disp_operands)
9800         output_disp (insn_start_frag, insn_start_off);
9801
9802       if (i.imm_operands)
9803         output_imm (insn_start_frag, insn_start_off);
9804
9805       /*
9806        * frag_now_fix () returning plain abs_section_offset when we're in the
9807        * absolute section, and abs_section_offset not getting updated as data
9808        * gets added to the frag breaks the logic below.
9809        */
9810       if (now_seg != absolute_section)
9811         {
9812           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9813           if (j > 15)
9814             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9815                      j);
9816           else if (fragP)
9817             {
9818               /* NB: Don't add prefix with GOTPC relocation since
9819                  output_disp() above depends on the fixed encoding
9820                  length.  Can't add prefix with TLS relocation since
9821                  it breaks TLS linker optimization.  */
9822               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9823               /* Prefix count on the current instruction.  */
9824               unsigned int count = i.vex.length;
9825               unsigned int k;
9826               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9827                 /* REX byte is encoded in VEX/EVEX prefix.  */
9828                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9829                   count++;
9830
9831               /* Count prefixes for extended opcode maps.  */
9832               if (!i.vex.length)
9833                 switch (i.tm.opcode_modifier.opcodespace)
9834                   {
9835                   case SPACE_BASE:
9836                     break;
9837                   case SPACE_0F:
9838                     count++;
9839                     break;
9840                   case SPACE_0F38:
9841                   case SPACE_0F3A:
9842                     count += 2;
9843                     break;
9844                   default:
9845                     abort ();
9846                   }
9847
9848               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9849                   == BRANCH_PREFIX)
9850                 {
9851                   /* Set the maximum prefix size in BRANCH_PREFIX
9852                      frag.  */
9853                   if (fragP->tc_frag_data.max_bytes > max)
9854                     fragP->tc_frag_data.max_bytes = max;
9855                   if (fragP->tc_frag_data.max_bytes > count)
9856                     fragP->tc_frag_data.max_bytes -= count;
9857                   else
9858                     fragP->tc_frag_data.max_bytes = 0;
9859                 }
9860               else
9861                 {
9862                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9863                      frag.  */
9864                   unsigned int max_prefix_size;
9865                   if (align_branch_prefix_size > max)
9866                     max_prefix_size = max;
9867                   else
9868                     max_prefix_size = align_branch_prefix_size;
9869                   if (max_prefix_size > count)
9870                     fragP->tc_frag_data.max_prefix_length
9871                       = max_prefix_size - count;
9872                 }
9873
9874               /* Use existing segment prefix if possible.  Use CS
9875                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9876                  segment prefix with ESP/EBP base register and use DS
9877                  segment prefix without ESP/EBP base register.  */
9878               if (i.prefix[SEG_PREFIX])
9879                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9880               else if (flag_code == CODE_64BIT)
9881                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9882               else if (i.base_reg
9883                        && (i.base_reg->reg_num == 4
9884                            || i.base_reg->reg_num == 5))
9885                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9886               else
9887                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9888             }
9889         }
9890     }
9891
9892   /* NB: Don't work with COND_JUMP86 without i386.  */
9893   if (align_branch_power
9894       && now_seg != absolute_section
9895       && cpu_arch_flags.bitfield.cpui386)
9896     {
9897       /* Terminate each frag so that we can add prefix and check for
9898          fused jcc.  */
9899       frag_wane (frag_now);
9900       frag_new (0);
9901     }
9902
9903 #ifdef DEBUG386
9904   if (flag_debug)
9905     {
9906       pi ("" /*line*/, &i);
9907     }
9908 #endif /* DEBUG386  */
9909 }
9910
9911 /* Return the size of the displacement operand N.  */
9912
9913 static int
9914 disp_size (unsigned int n)
9915 {
9916   int size = 4;
9917
9918   if (i.types[n].bitfield.disp64)
9919     size = 8;
9920   else if (i.types[n].bitfield.disp8)
9921     size = 1;
9922   else if (i.types[n].bitfield.disp16)
9923     size = 2;
9924   return size;
9925 }
9926
9927 /* Return the size of the immediate operand N.  */
9928
9929 static int
9930 imm_size (unsigned int n)
9931 {
9932   int size = 4;
9933   if (i.types[n].bitfield.imm64)
9934     size = 8;
9935   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9936     size = 1;
9937   else if (i.types[n].bitfield.imm16)
9938     size = 2;
9939   return size;
9940 }
9941
9942 static void
9943 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9944 {
9945   char *p;
9946   unsigned int n;
9947
9948   for (n = 0; n < i.operands; n++)
9949     {
9950       if (operand_type_check (i.types[n], disp))
9951         {
9952           int size = disp_size (n);
9953
9954           if (now_seg == absolute_section)
9955             abs_section_offset += size;
9956           else if (i.op[n].disps->X_op == O_constant)
9957             {
9958               offsetT val = i.op[n].disps->X_add_number;
9959
9960               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9961                                      size);
9962               p = frag_more (size);
9963               md_number_to_chars (p, val, size);
9964             }
9965           else
9966             {
9967               enum bfd_reloc_code_real reloc_type;
9968               int sign = i.types[n].bitfield.disp32s;
9969               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
9970               fixS *fixP;
9971
9972               /* We can't have 8 bit displacement here.  */
9973               gas_assert (!i.types[n].bitfield.disp8);
9974
9975               /* The PC relative address is computed relative
9976                  to the instruction boundary, so in case immediate
9977                  fields follows, we need to adjust the value.  */
9978               if (pcrel && i.imm_operands)
9979                 {
9980                   unsigned int n1;
9981                   int sz = 0;
9982
9983                   for (n1 = 0; n1 < i.operands; n1++)
9984                     if (operand_type_check (i.types[n1], imm))
9985                       {
9986                         /* Only one immediate is allowed for PC
9987                            relative address.  */
9988                         gas_assert (sz == 0);
9989                         sz = imm_size (n1);
9990                         i.op[n].disps->X_add_number -= sz;
9991                       }
9992                   /* We should find the immediate.  */
9993                   gas_assert (sz != 0);
9994                 }
9995
9996               p = frag_more (size);
9997               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9998               if (GOT_symbol
9999                   && GOT_symbol == i.op[n].disps->X_add_symbol
10000                   && (((reloc_type == BFD_RELOC_32
10001                         || reloc_type == BFD_RELOC_X86_64_32S
10002                         || (reloc_type == BFD_RELOC_64
10003                             && object_64bit))
10004                        && (i.op[n].disps->X_op == O_symbol
10005                            || (i.op[n].disps->X_op == O_add
10006                                && ((symbol_get_value_expression
10007                                     (i.op[n].disps->X_op_symbol)->X_op)
10008                                    == O_subtract))))
10009                       || reloc_type == BFD_RELOC_32_PCREL))
10010                 {
10011                   if (!object_64bit)
10012                     {
10013                       reloc_type = BFD_RELOC_386_GOTPC;
10014                       i.has_gotpc_tls_reloc = true;
10015                       i.op[n].disps->X_add_number +=
10016                         encoding_length (insn_start_frag, insn_start_off, p);
10017                     }
10018                   else if (reloc_type == BFD_RELOC_64)
10019                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10020                   else
10021                     /* Don't do the adjustment for x86-64, as there
10022                        the pcrel addressing is relative to the _next_
10023                        insn, and that is taken care of in other code.  */
10024                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10025                 }
10026               else if (align_branch_power)
10027                 {
10028                   switch (reloc_type)
10029                     {
10030                     case BFD_RELOC_386_TLS_GD:
10031                     case BFD_RELOC_386_TLS_LDM:
10032                     case BFD_RELOC_386_TLS_IE:
10033                     case BFD_RELOC_386_TLS_IE_32:
10034                     case BFD_RELOC_386_TLS_GOTIE:
10035                     case BFD_RELOC_386_TLS_GOTDESC:
10036                     case BFD_RELOC_386_TLS_DESC_CALL:
10037                     case BFD_RELOC_X86_64_TLSGD:
10038                     case BFD_RELOC_X86_64_TLSLD:
10039                     case BFD_RELOC_X86_64_GOTTPOFF:
10040                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10041                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10042                       i.has_gotpc_tls_reloc = true;
10043                     default:
10044                       break;
10045                     }
10046                 }
10047               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10048                                   size, i.op[n].disps, pcrel,
10049                                   reloc_type);
10050
10051               if (flag_code == CODE_64BIT && size == 4 && pcrel
10052                   && !i.prefix[ADDR_PREFIX])
10053                 fixP->fx_signed = 1;
10054
10055               /* Check for "call/jmp *mem", "mov mem, %reg",
10056                  "test %reg, mem" and "binop mem, %reg" where binop
10057                  is one of adc, add, and, cmp, or, sbb, sub, xor
10058                  instructions without data prefix.  Always generate
10059                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10060               if (i.prefix[DATA_PREFIX] == 0
10061                   && (generate_relax_relocations
10062                       || (!object_64bit
10063                           && i.rm.mode == 0
10064                           && i.rm.regmem == 5))
10065                   && (i.rm.mode == 2
10066                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10067                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10068                   && ((i.operands == 1
10069                        && i.tm.base_opcode == 0xff
10070                        && (i.rm.reg == 2 || i.rm.reg == 4))
10071                       || (i.operands == 2
10072                           && (i.tm.base_opcode == 0x8b
10073                               || i.tm.base_opcode == 0x85
10074                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10075                 {
10076                   if (object_64bit)
10077                     {
10078                       fixP->fx_tcbit = i.rex != 0;
10079                       if (i.base_reg
10080                           && (i.base_reg->reg_num == RegIP))
10081                       fixP->fx_tcbit2 = 1;
10082                     }
10083                   else
10084                     fixP->fx_tcbit2 = 1;
10085                 }
10086             }
10087         }
10088     }
10089 }
10090
10091 static void
10092 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10093 {
10094   char *p;
10095   unsigned int n;
10096
10097   for (n = 0; n < i.operands; n++)
10098     {
10099       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
10100       if (i.rounding.type != rc_none && n == i.rounding.operand)
10101         continue;
10102
10103       if (operand_type_check (i.types[n], imm))
10104         {
10105           int size = imm_size (n);
10106
10107           if (now_seg == absolute_section)
10108             abs_section_offset += size;
10109           else if (i.op[n].imms->X_op == O_constant)
10110             {
10111               offsetT val;
10112
10113               val = offset_in_range (i.op[n].imms->X_add_number,
10114                                      size);
10115               p = frag_more (size);
10116               md_number_to_chars (p, val, size);
10117             }
10118           else
10119             {
10120               /* Not absolute_section.
10121                  Need a 32-bit fixup (don't support 8bit
10122                  non-absolute imms).  Try to support other
10123                  sizes ...  */
10124               enum bfd_reloc_code_real reloc_type;
10125               int sign;
10126
10127               if (i.types[n].bitfield.imm32s
10128                   && (i.suffix == QWORD_MNEM_SUFFIX
10129                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10130                 sign = 1;
10131               else
10132                 sign = 0;
10133
10134               p = frag_more (size);
10135               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10136
10137               /*   This is tough to explain.  We end up with this one if we
10138                * have operands that look like
10139                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10140                * obtain the absolute address of the GOT, and it is strongly
10141                * preferable from a performance point of view to avoid using
10142                * a runtime relocation for this.  The actual sequence of
10143                * instructions often look something like:
10144                *
10145                *        call    .L66
10146                * .L66:
10147                *        popl    %ebx
10148                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10149                *
10150                *   The call and pop essentially return the absolute address
10151                * of the label .L66 and store it in %ebx.  The linker itself
10152                * will ultimately change the first operand of the addl so
10153                * that %ebx points to the GOT, but to keep things simple, the
10154                * .o file must have this operand set so that it generates not
10155                * the absolute address of .L66, but the absolute address of
10156                * itself.  This allows the linker itself simply treat a GOTPC
10157                * relocation as asking for a pcrel offset to the GOT to be
10158                * added in, and the addend of the relocation is stored in the
10159                * operand field for the instruction itself.
10160                *
10161                *   Our job here is to fix the operand so that it would add
10162                * the correct offset so that %ebx would point to itself.  The
10163                * thing that is tricky is that .-.L66 will point to the
10164                * beginning of the instruction, so we need to further modify
10165                * the operand so that it will point to itself.  There are
10166                * other cases where you have something like:
10167                *
10168                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10169                *
10170                * and here no correction would be required.  Internally in
10171                * the assembler we treat operands of this form as not being
10172                * pcrel since the '.' is explicitly mentioned, and I wonder
10173                * whether it would simplify matters to do it this way.  Who
10174                * knows.  In earlier versions of the PIC patches, the
10175                * pcrel_adjust field was used to store the correction, but
10176                * since the expression is not pcrel, I felt it would be
10177                * confusing to do it this way.  */
10178
10179               if ((reloc_type == BFD_RELOC_32
10180                    || reloc_type == BFD_RELOC_X86_64_32S
10181                    || reloc_type == BFD_RELOC_64)
10182                   && GOT_symbol
10183                   && GOT_symbol == i.op[n].imms->X_add_symbol
10184                   && (i.op[n].imms->X_op == O_symbol
10185                       || (i.op[n].imms->X_op == O_add
10186                           && ((symbol_get_value_expression
10187                                (i.op[n].imms->X_op_symbol)->X_op)
10188                               == O_subtract))))
10189                 {
10190                   if (!object_64bit)
10191                     reloc_type = BFD_RELOC_386_GOTPC;
10192                   else if (size == 4)
10193                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10194                   else if (size == 8)
10195                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10196                   i.has_gotpc_tls_reloc = true;
10197                   i.op[n].imms->X_add_number +=
10198                     encoding_length (insn_start_frag, insn_start_off, p);
10199                 }
10200               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10201                            i.op[n].imms, 0, reloc_type);
10202             }
10203         }
10204     }
10205 }
10206 \f
10207 /* x86_cons_fix_new is called via the expression parsing code when a
10208    reloc is needed.  We use this hook to get the correct .got reloc.  */
10209 static int cons_sign = -1;
10210
10211 void
10212 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10213                   expressionS *exp, bfd_reloc_code_real_type r)
10214 {
10215   r = reloc (len, 0, cons_sign, r);
10216
10217 #ifdef TE_PE
10218   if (exp->X_op == O_secrel)
10219     {
10220       exp->X_op = O_symbol;
10221       r = BFD_RELOC_32_SECREL;
10222     }
10223 #endif
10224
10225   fix_new_exp (frag, off, len, exp, 0, r);
10226 }
10227
10228 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10229    purpose of the `.dc.a' internal pseudo-op.  */
10230
10231 int
10232 x86_address_bytes (void)
10233 {
10234   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10235     return 4;
10236   return stdoutput->arch_info->bits_per_address / 8;
10237 }
10238
10239 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10240      || defined (LEX_AT)) && !defined (TE_PE)
10241 # define lex_got(reloc, adjust, types) NULL
10242 #else
10243 /* Parse operands of the form
10244    <symbol>@GOTOFF+<nnn>
10245    and similar .plt or .got references.
10246
10247    If we find one, set up the correct relocation in RELOC and copy the
10248    input string, minus the `@GOTOFF' into a malloc'd buffer for
10249    parsing by the calling routine.  Return this buffer, and if ADJUST
10250    is non-null set it to the length of the string we removed from the
10251    input line.  Otherwise return NULL.  */
10252 static char *
10253 lex_got (enum bfd_reloc_code_real *rel,
10254          int *adjust,
10255          i386_operand_type *types)
10256 {
10257   /* Some of the relocations depend on the size of what field is to
10258      be relocated.  But in our callers i386_immediate and i386_displacement
10259      we don't yet know the operand size (this will be set by insn
10260      matching).  Hence we record the word32 relocation here,
10261      and adjust the reloc according to the real size in reloc().  */
10262   static const struct {
10263     const char *str;
10264     int len;
10265     const enum bfd_reloc_code_real rel[2];
10266     const i386_operand_type types64;
10267     bool need_GOT_symbol;
10268   } gotrel[] = {
10269 #ifndef TE_PE
10270 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10271     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10272                                         BFD_RELOC_SIZE32 },
10273       OPERAND_TYPE_IMM32_64, false },
10274 #endif
10275     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10276                                        BFD_RELOC_X86_64_PLTOFF64 },
10277       OPERAND_TYPE_IMM64, true },
10278     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10279                                        BFD_RELOC_X86_64_PLT32    },
10280       OPERAND_TYPE_IMM32_32S_DISP32, false },
10281     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10282                                        BFD_RELOC_X86_64_GOTPLT64 },
10283       OPERAND_TYPE_IMM64_DISP64, true },
10284     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10285                                        BFD_RELOC_X86_64_GOTOFF64 },
10286       OPERAND_TYPE_IMM64_DISP64, true },
10287     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10288                                        BFD_RELOC_X86_64_GOTPCREL },
10289       OPERAND_TYPE_IMM32_32S_DISP32, true },
10290     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10291                                        BFD_RELOC_X86_64_TLSGD    },
10292       OPERAND_TYPE_IMM32_32S_DISP32, true },
10293     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10294                                        _dummy_first_bfd_reloc_code_real },
10295       OPERAND_TYPE_NONE, true },
10296     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10297                                        BFD_RELOC_X86_64_TLSLD    },
10298       OPERAND_TYPE_IMM32_32S_DISP32, true },
10299     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10300                                        BFD_RELOC_X86_64_GOTTPOFF },
10301       OPERAND_TYPE_IMM32_32S_DISP32, true },
10302     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10303                                        BFD_RELOC_X86_64_TPOFF32  },
10304       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10305     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10306                                        _dummy_first_bfd_reloc_code_real },
10307       OPERAND_TYPE_NONE, true },
10308     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10309                                        BFD_RELOC_X86_64_DTPOFF32 },
10310       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10311     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10312                                        _dummy_first_bfd_reloc_code_real },
10313       OPERAND_TYPE_NONE, true },
10314     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10315                                        _dummy_first_bfd_reloc_code_real },
10316       OPERAND_TYPE_NONE, true },
10317     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10318                                        BFD_RELOC_X86_64_GOT32    },
10319       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10320     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10321                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10322       OPERAND_TYPE_IMM32_32S_DISP32, true },
10323     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10324                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10325       OPERAND_TYPE_IMM32_32S_DISP32, true },
10326 #else /* TE_PE */
10327     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10328                                        BFD_RELOC_32_SECREL },
10329       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10330 #endif
10331   };
10332   char *cp;
10333   unsigned int j;
10334
10335 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10336   if (!IS_ELF)
10337     return NULL;
10338 #endif
10339
10340   for (cp = input_line_pointer; *cp != '@'; cp++)
10341     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10342       return NULL;
10343
10344   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10345     {
10346       int len = gotrel[j].len;
10347       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10348         {
10349           if (gotrel[j].rel[object_64bit] != 0)
10350             {
10351               int first, second;
10352               char *tmpbuf, *past_reloc;
10353
10354               *rel = gotrel[j].rel[object_64bit];
10355
10356               if (types)
10357                 {
10358                   if (flag_code != CODE_64BIT)
10359                     {
10360                       types->bitfield.imm32 = 1;
10361                       types->bitfield.disp32 = 1;
10362                     }
10363                   else
10364                     *types = gotrel[j].types64;
10365                 }
10366
10367               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10368                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10369
10370               /* The length of the first part of our input line.  */
10371               first = cp - input_line_pointer;
10372
10373               /* The second part goes from after the reloc token until
10374                  (and including) an end_of_line char or comma.  */
10375               past_reloc = cp + 1 + len;
10376               cp = past_reloc;
10377               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10378                 ++cp;
10379               second = cp + 1 - past_reloc;
10380
10381               /* Allocate and copy string.  The trailing NUL shouldn't
10382                  be necessary, but be safe.  */
10383               tmpbuf = XNEWVEC (char, first + second + 2);
10384               memcpy (tmpbuf, input_line_pointer, first);
10385               if (second != 0 && *past_reloc != ' ')
10386                 /* Replace the relocation token with ' ', so that
10387                    errors like foo@GOTOFF1 will be detected.  */
10388                 tmpbuf[first++] = ' ';
10389               else
10390                 /* Increment length by 1 if the relocation token is
10391                    removed.  */
10392                 len++;
10393               if (adjust)
10394                 *adjust = len;
10395               memcpy (tmpbuf + first, past_reloc, second);
10396               tmpbuf[first + second] = '\0';
10397               return tmpbuf;
10398             }
10399
10400           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10401                   gotrel[j].str, 1 << (5 + object_64bit));
10402           return NULL;
10403         }
10404     }
10405
10406   /* Might be a symbol version string.  Don't as_bad here.  */
10407   return NULL;
10408 }
10409 #endif
10410
10411 bfd_reloc_code_real_type
10412 x86_cons (expressionS *exp, int size)
10413 {
10414   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10415
10416 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10417       && !defined (LEX_AT)) \
10418     || defined (TE_PE)
10419   intel_syntax = -intel_syntax;
10420
10421   exp->X_md = 0;
10422   if (size == 4 || (object_64bit && size == 8))
10423     {
10424       /* Handle @GOTOFF and the like in an expression.  */
10425       char *save;
10426       char *gotfree_input_line;
10427       int adjust = 0;
10428
10429       save = input_line_pointer;
10430       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10431       if (gotfree_input_line)
10432         input_line_pointer = gotfree_input_line;
10433
10434       expression (exp);
10435
10436       if (gotfree_input_line)
10437         {
10438           /* expression () has merrily parsed up to the end of line,
10439              or a comma - in the wrong buffer.  Transfer how far
10440              input_line_pointer has moved to the right buffer.  */
10441           input_line_pointer = (save
10442                                 + (input_line_pointer - gotfree_input_line)
10443                                 + adjust);
10444           free (gotfree_input_line);
10445           if (exp->X_op == O_constant
10446               || exp->X_op == O_absent
10447               || exp->X_op == O_illegal
10448               || exp->X_op == O_register
10449               || exp->X_op == O_big)
10450             {
10451               char c = *input_line_pointer;
10452               *input_line_pointer = 0;
10453               as_bad (_("missing or invalid expression `%s'"), save);
10454               *input_line_pointer = c;
10455             }
10456           else if ((got_reloc == BFD_RELOC_386_PLT32
10457                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10458                    && exp->X_op != O_symbol)
10459             {
10460               char c = *input_line_pointer;
10461               *input_line_pointer = 0;
10462               as_bad (_("invalid PLT expression `%s'"), save);
10463               *input_line_pointer = c;
10464             }
10465         }
10466     }
10467   else
10468     expression (exp);
10469
10470   intel_syntax = -intel_syntax;
10471
10472   if (intel_syntax)
10473     i386_intel_simplify (exp);
10474 #else
10475   expression (exp);
10476 #endif
10477
10478   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10479   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10480     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10481
10482   return got_reloc;
10483 }
10484
10485 static void
10486 signed_cons (int size)
10487 {
10488   if (object_64bit)
10489     cons_sign = 1;
10490   cons (size);
10491   cons_sign = -1;
10492 }
10493
10494 #ifdef TE_PE
10495 static void
10496 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10497 {
10498   expressionS exp;
10499
10500   do
10501     {
10502       expression (&exp);
10503       if (exp.X_op == O_symbol)
10504         exp.X_op = O_secrel;
10505
10506       emit_expr (&exp, 4);
10507     }
10508   while (*input_line_pointer++ == ',');
10509
10510   input_line_pointer--;
10511   demand_empty_rest_of_line ();
10512 }
10513 #endif
10514
10515 /* Handle Vector operations.  */
10516
10517 static char *
10518 check_VecOperations (char *op_string)
10519 {
10520   const reg_entry *mask;
10521   const char *saved;
10522   char *end_op;
10523
10524   while (*op_string)
10525     {
10526       saved = op_string;
10527       if (*op_string == '{')
10528         {
10529           op_string++;
10530
10531           /* Check broadcasts.  */
10532           if (startswith (op_string, "1to"))
10533             {
10534               unsigned int bcst_type;
10535
10536               if (i.broadcast.type)
10537                 goto duplicated_vec_op;
10538
10539               op_string += 3;
10540               if (*op_string == '8')
10541                 bcst_type = 8;
10542               else if (*op_string == '4')
10543                 bcst_type = 4;
10544               else if (*op_string == '2')
10545                 bcst_type = 2;
10546               else if (*op_string == '1'
10547                        && *(op_string+1) == '6')
10548                 {
10549                   bcst_type = 16;
10550                   op_string++;
10551                 }
10552               else
10553                 {
10554                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10555                   return NULL;
10556                 }
10557               op_string++;
10558
10559               i.broadcast.type = bcst_type;
10560               i.broadcast.operand = this_operand;
10561             }
10562           /* Check masking operation.  */
10563           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10564             {
10565               if (mask == &bad_reg)
10566                 return NULL;
10567
10568               /* k0 can't be used for write mask.  */
10569               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10570                 {
10571                   as_bad (_("`%s%s' can't be used for write mask"),
10572                           register_prefix, mask->reg_name);
10573                   return NULL;
10574                 }
10575
10576               if (!i.mask.reg)
10577                 {
10578                   i.mask.reg = mask;
10579                   i.mask.operand = this_operand;
10580                 }
10581               else if (i.mask.reg->reg_num)
10582                 goto duplicated_vec_op;
10583               else
10584                 {
10585                   i.mask.reg = mask;
10586
10587                   /* Only "{z}" is allowed here.  No need to check
10588                      zeroing mask explicitly.  */
10589                   if (i.mask.operand != (unsigned int) this_operand)
10590                     {
10591                       as_bad (_("invalid write mask `%s'"), saved);
10592                       return NULL;
10593                     }
10594                 }
10595
10596               op_string = end_op;
10597             }
10598           /* Check zeroing-flag for masking operation.  */
10599           else if (*op_string == 'z')
10600             {
10601               if (!i.mask.reg)
10602                 {
10603                   i.mask.reg = reg_k0;
10604                   i.mask.zeroing = 1;
10605                   i.mask.operand = this_operand;
10606                 }
10607               else
10608                 {
10609                   if (i.mask.zeroing)
10610                     {
10611                     duplicated_vec_op:
10612                       as_bad (_("duplicated `%s'"), saved);
10613                       return NULL;
10614                     }
10615
10616                   i.mask.zeroing = 1;
10617
10618                   /* Only "{%k}" is allowed here.  No need to check mask
10619                      register explicitly.  */
10620                   if (i.mask.operand != (unsigned int) this_operand)
10621                     {
10622                       as_bad (_("invalid zeroing-masking `%s'"),
10623                               saved);
10624                       return NULL;
10625                     }
10626                 }
10627
10628               op_string++;
10629             }
10630           else
10631             goto unknown_vec_op;
10632
10633           if (*op_string != '}')
10634             {
10635               as_bad (_("missing `}' in `%s'"), saved);
10636               return NULL;
10637             }
10638           op_string++;
10639
10640           /* Strip whitespace since the addition of pseudo prefixes
10641              changed how the scrubber treats '{'.  */
10642           if (is_space_char (*op_string))
10643             ++op_string;
10644
10645           continue;
10646         }
10647     unknown_vec_op:
10648       /* We don't know this one.  */
10649       as_bad (_("unknown vector operation: `%s'"), saved);
10650       return NULL;
10651     }
10652
10653   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10654     {
10655       as_bad (_("zeroing-masking only allowed with write mask"));
10656       return NULL;
10657     }
10658
10659   return op_string;
10660 }
10661
10662 static int
10663 i386_immediate (char *imm_start)
10664 {
10665   char *save_input_line_pointer;
10666   char *gotfree_input_line;
10667   segT exp_seg = 0;
10668   expressionS *exp;
10669   i386_operand_type types;
10670
10671   operand_type_set (&types, ~0);
10672
10673   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10674     {
10675       as_bad (_("at most %d immediate operands are allowed"),
10676               MAX_IMMEDIATE_OPERANDS);
10677       return 0;
10678     }
10679
10680   exp = &im_expressions[i.imm_operands++];
10681   i.op[this_operand].imms = exp;
10682
10683   if (is_space_char (*imm_start))
10684     ++imm_start;
10685
10686   save_input_line_pointer = input_line_pointer;
10687   input_line_pointer = imm_start;
10688
10689   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10690   if (gotfree_input_line)
10691     input_line_pointer = gotfree_input_line;
10692
10693   exp_seg = expression (exp);
10694
10695   SKIP_WHITESPACE ();
10696   if (*input_line_pointer)
10697     as_bad (_("junk `%s' after expression"), input_line_pointer);
10698
10699   input_line_pointer = save_input_line_pointer;
10700   if (gotfree_input_line)
10701     {
10702       free (gotfree_input_line);
10703
10704       if (exp->X_op == O_constant)
10705         exp->X_op = O_illegal;
10706     }
10707
10708   if (exp_seg == reg_section)
10709     {
10710       as_bad (_("illegal immediate register operand %s"), imm_start);
10711       return 0;
10712     }
10713
10714   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10715 }
10716
10717 static int
10718 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10719                          i386_operand_type types, const char *imm_start)
10720 {
10721   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10722     {
10723       if (imm_start)
10724         as_bad (_("missing or invalid immediate expression `%s'"),
10725                 imm_start);
10726       return 0;
10727     }
10728   else if (exp->X_op == O_constant)
10729     {
10730       /* Size it properly later.  */
10731       i.types[this_operand].bitfield.imm64 = 1;
10732
10733       /* If not 64bit, sign/zero extend val, to account for wraparound
10734          when !BFD64.  */
10735       if (flag_code != CODE_64BIT)
10736         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10737     }
10738 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10739   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10740            && exp_seg != absolute_section
10741            && exp_seg != text_section
10742            && exp_seg != data_section
10743            && exp_seg != bss_section
10744            && exp_seg != undefined_section
10745            && !bfd_is_com_section (exp_seg))
10746     {
10747       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10748       return 0;
10749     }
10750 #endif
10751   else
10752     {
10753       /* This is an address.  The size of the address will be
10754          determined later, depending on destination register,
10755          suffix, or the default for the section.  */
10756       i.types[this_operand].bitfield.imm8 = 1;
10757       i.types[this_operand].bitfield.imm16 = 1;
10758       i.types[this_operand].bitfield.imm32 = 1;
10759       i.types[this_operand].bitfield.imm32s = 1;
10760       i.types[this_operand].bitfield.imm64 = 1;
10761       i.types[this_operand] = operand_type_and (i.types[this_operand],
10762                                                 types);
10763     }
10764
10765   return 1;
10766 }
10767
10768 static char *
10769 i386_scale (char *scale)
10770 {
10771   offsetT val;
10772   char *save = input_line_pointer;
10773
10774   input_line_pointer = scale;
10775   val = get_absolute_expression ();
10776
10777   switch (val)
10778     {
10779     case 1:
10780       i.log2_scale_factor = 0;
10781       break;
10782     case 2:
10783       i.log2_scale_factor = 1;
10784       break;
10785     case 4:
10786       i.log2_scale_factor = 2;
10787       break;
10788     case 8:
10789       i.log2_scale_factor = 3;
10790       break;
10791     default:
10792       {
10793         char sep = *input_line_pointer;
10794
10795         *input_line_pointer = '\0';
10796         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10797                 scale);
10798         *input_line_pointer = sep;
10799         input_line_pointer = save;
10800         return NULL;
10801       }
10802     }
10803   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10804     {
10805       as_warn (_("scale factor of %d without an index register"),
10806                1 << i.log2_scale_factor);
10807       i.log2_scale_factor = 0;
10808     }
10809   scale = input_line_pointer;
10810   input_line_pointer = save;
10811   return scale;
10812 }
10813
10814 static int
10815 i386_displacement (char *disp_start, char *disp_end)
10816 {
10817   expressionS *exp;
10818   segT exp_seg = 0;
10819   char *save_input_line_pointer;
10820   char *gotfree_input_line;
10821   int override;
10822   i386_operand_type bigdisp, types = anydisp;
10823   int ret;
10824
10825   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10826     {
10827       as_bad (_("at most %d displacement operands are allowed"),
10828               MAX_MEMORY_OPERANDS);
10829       return 0;
10830     }
10831
10832   operand_type_set (&bigdisp, 0);
10833   if (i.jumpabsolute
10834       || i.types[this_operand].bitfield.baseindex
10835       || (current_templates->start->opcode_modifier.jump != JUMP
10836           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10837     {
10838       i386_addressing_mode ();
10839       override = (i.prefix[ADDR_PREFIX] != 0);
10840       if (flag_code == CODE_64BIT)
10841         {
10842           if (!override)
10843             {
10844               bigdisp.bitfield.disp32s = 1;
10845               bigdisp.bitfield.disp64 = 1;
10846             }
10847           else
10848             bigdisp.bitfield.disp32 = 1;
10849         }
10850       else if ((flag_code == CODE_16BIT) ^ override)
10851           bigdisp.bitfield.disp16 = 1;
10852       else
10853           bigdisp.bitfield.disp32 = 1;
10854     }
10855   else
10856     {
10857       /* For PC-relative branches, the width of the displacement may be
10858          dependent upon data size, but is never dependent upon address size.
10859          Also make sure to not unintentionally match against a non-PC-relative
10860          branch template.  */
10861       static templates aux_templates;
10862       const insn_template *t = current_templates->start;
10863       bool has_intel64 = false;
10864
10865       aux_templates.start = t;
10866       while (++t < current_templates->end)
10867         {
10868           if (t->opcode_modifier.jump
10869               != current_templates->start->opcode_modifier.jump)
10870             break;
10871           if ((t->opcode_modifier.isa64 >= INTEL64))
10872             has_intel64 = true;
10873         }
10874       if (t < current_templates->end)
10875         {
10876           aux_templates.end = t;
10877           current_templates = &aux_templates;
10878         }
10879
10880       override = (i.prefix[DATA_PREFIX] != 0);
10881       if (flag_code == CODE_64BIT)
10882         {
10883           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10884               && (!intel64 || !has_intel64))
10885             bigdisp.bitfield.disp16 = 1;
10886           else
10887             bigdisp.bitfield.disp32s = 1;
10888         }
10889       else
10890         {
10891           if (!override)
10892             override = (i.suffix == (flag_code != CODE_16BIT
10893                                      ? WORD_MNEM_SUFFIX
10894                                      : LONG_MNEM_SUFFIX));
10895           bigdisp.bitfield.disp32 = 1;
10896           if ((flag_code == CODE_16BIT) ^ override)
10897             {
10898               bigdisp.bitfield.disp32 = 0;
10899               bigdisp.bitfield.disp16 = 1;
10900             }
10901         }
10902     }
10903   i.types[this_operand] = operand_type_or (i.types[this_operand],
10904                                            bigdisp);
10905
10906   exp = &disp_expressions[i.disp_operands];
10907   i.op[this_operand].disps = exp;
10908   i.disp_operands++;
10909   save_input_line_pointer = input_line_pointer;
10910   input_line_pointer = disp_start;
10911   END_STRING_AND_SAVE (disp_end);
10912
10913 #ifndef GCC_ASM_O_HACK
10914 #define GCC_ASM_O_HACK 0
10915 #endif
10916 #if GCC_ASM_O_HACK
10917   END_STRING_AND_SAVE (disp_end + 1);
10918   if (i.types[this_operand].bitfield.baseIndex
10919       && displacement_string_end[-1] == '+')
10920     {
10921       /* This hack is to avoid a warning when using the "o"
10922          constraint within gcc asm statements.
10923          For instance:
10924
10925          #define _set_tssldt_desc(n,addr,limit,type) \
10926          __asm__ __volatile__ ( \
10927          "movw %w2,%0\n\t" \
10928          "movw %w1,2+%0\n\t" \
10929          "rorl $16,%1\n\t" \
10930          "movb %b1,4+%0\n\t" \
10931          "movb %4,5+%0\n\t" \
10932          "movb $0,6+%0\n\t" \
10933          "movb %h1,7+%0\n\t" \
10934          "rorl $16,%1" \
10935          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10936
10937          This works great except that the output assembler ends
10938          up looking a bit weird if it turns out that there is
10939          no offset.  You end up producing code that looks like:
10940
10941          #APP
10942          movw $235,(%eax)
10943          movw %dx,2+(%eax)
10944          rorl $16,%edx
10945          movb %dl,4+(%eax)
10946          movb $137,5+(%eax)
10947          movb $0,6+(%eax)
10948          movb %dh,7+(%eax)
10949          rorl $16,%edx
10950          #NO_APP
10951
10952          So here we provide the missing zero.  */
10953
10954       *displacement_string_end = '0';
10955     }
10956 #endif
10957   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10958   if (gotfree_input_line)
10959     input_line_pointer = gotfree_input_line;
10960
10961   exp_seg = expression (exp);
10962
10963   SKIP_WHITESPACE ();
10964   if (*input_line_pointer)
10965     as_bad (_("junk `%s' after expression"), input_line_pointer);
10966 #if GCC_ASM_O_HACK
10967   RESTORE_END_STRING (disp_end + 1);
10968 #endif
10969   input_line_pointer = save_input_line_pointer;
10970   if (gotfree_input_line)
10971     {
10972       free (gotfree_input_line);
10973
10974       if (exp->X_op == O_constant || exp->X_op == O_register)
10975         exp->X_op = O_illegal;
10976     }
10977
10978   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10979
10980   RESTORE_END_STRING (disp_end);
10981
10982   return ret;
10983 }
10984
10985 static int
10986 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10987                             i386_operand_type types, const char *disp_start)
10988 {
10989   i386_operand_type bigdisp;
10990   int ret = 1;
10991
10992   /* We do this to make sure that the section symbol is in
10993      the symbol table.  We will ultimately change the relocation
10994      to be relative to the beginning of the section.  */
10995   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10996       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10997       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10998     {
10999       if (exp->X_op != O_symbol)
11000         goto inv_disp;
11001
11002       if (S_IS_LOCAL (exp->X_add_symbol)
11003           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11004           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11005         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11006       exp->X_op = O_subtract;
11007       exp->X_op_symbol = GOT_symbol;
11008       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11009         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11010       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11011         i.reloc[this_operand] = BFD_RELOC_64;
11012       else
11013         i.reloc[this_operand] = BFD_RELOC_32;
11014     }
11015
11016   else if (exp->X_op == O_absent
11017            || exp->X_op == O_illegal
11018            || exp->X_op == O_big)
11019     {
11020     inv_disp:
11021       as_bad (_("missing or invalid displacement expression `%s'"),
11022               disp_start);
11023       ret = 0;
11024     }
11025
11026 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11027   else if (exp->X_op != O_constant
11028            && OUTPUT_FLAVOR == bfd_target_aout_flavour
11029            && exp_seg != absolute_section
11030            && exp_seg != text_section
11031            && exp_seg != data_section
11032            && exp_seg != bss_section
11033            && exp_seg != undefined_section
11034            && !bfd_is_com_section (exp_seg))
11035     {
11036       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11037       ret = 0;
11038     }
11039 #endif
11040
11041   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
11042       /* Constants get taken care of by optimize_disp().  */
11043       && exp->X_op != O_constant)
11044     i.types[this_operand].bitfield.disp8 = 1;
11045
11046   /* Check if this is a displacement only operand.  */
11047   bigdisp = i.types[this_operand];
11048   bigdisp.bitfield.disp8 = 0;
11049   bigdisp.bitfield.disp16 = 0;
11050   bigdisp.bitfield.disp32 = 0;
11051   bigdisp.bitfield.disp32s = 0;
11052   bigdisp.bitfield.disp64 = 0;
11053   if (operand_type_all_zero (&bigdisp))
11054     i.types[this_operand] = operand_type_and (i.types[this_operand],
11055                                               types);
11056
11057   return ret;
11058 }
11059
11060 /* Return the active addressing mode, taking address override and
11061    registers forming the address into consideration.  Update the
11062    address override prefix if necessary.  */
11063
11064 static enum flag_code
11065 i386_addressing_mode (void)
11066 {
11067   enum flag_code addr_mode;
11068
11069   if (i.prefix[ADDR_PREFIX])
11070     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11071   else if (flag_code == CODE_16BIT
11072            && current_templates->start->cpu_flags.bitfield.cpumpx
11073            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11074               from md_assemble() by "is not a valid base/index expression"
11075               when there is a base and/or index.  */
11076            && !i.types[this_operand].bitfield.baseindex)
11077     {
11078       /* MPX insn memory operands with neither base nor index must be forced
11079          to use 32-bit addressing in 16-bit mode.  */
11080       addr_mode = CODE_32BIT;
11081       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11082       ++i.prefixes;
11083       gas_assert (!i.types[this_operand].bitfield.disp16);
11084       gas_assert (!i.types[this_operand].bitfield.disp32);
11085     }
11086   else
11087     {
11088       addr_mode = flag_code;
11089
11090 #if INFER_ADDR_PREFIX
11091       if (i.mem_operands == 0)
11092         {
11093           /* Infer address prefix from the first memory operand.  */
11094           const reg_entry *addr_reg = i.base_reg;
11095
11096           if (addr_reg == NULL)
11097             addr_reg = i.index_reg;
11098
11099           if (addr_reg)
11100             {
11101               if (addr_reg->reg_type.bitfield.dword)
11102                 addr_mode = CODE_32BIT;
11103               else if (flag_code != CODE_64BIT
11104                        && addr_reg->reg_type.bitfield.word)
11105                 addr_mode = CODE_16BIT;
11106
11107               if (addr_mode != flag_code)
11108                 {
11109                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11110                   i.prefixes += 1;
11111                   /* Change the size of any displacement too.  At most one
11112                      of Disp16 or Disp32 is set.
11113                      FIXME.  There doesn't seem to be any real need for
11114                      separate Disp16 and Disp32 flags.  The same goes for
11115                      Imm16 and Imm32.  Removing them would probably clean
11116                      up the code quite a lot.  */
11117                   if (flag_code != CODE_64BIT
11118                       && (i.types[this_operand].bitfield.disp16
11119                           || i.types[this_operand].bitfield.disp32))
11120                     i.types[this_operand]
11121                       = operand_type_xor (i.types[this_operand], disp16_32);
11122                 }
11123             }
11124         }
11125 #endif
11126     }
11127
11128   return addr_mode;
11129 }
11130
11131 /* Make sure the memory operand we've been dealt is valid.
11132    Return 1 on success, 0 on a failure.  */
11133
11134 static int
11135 i386_index_check (const char *operand_string)
11136 {
11137   const char *kind = "base/index";
11138   enum flag_code addr_mode = i386_addressing_mode ();
11139   const insn_template *t = current_templates->start;
11140
11141   if (t->opcode_modifier.isstring
11142       && !t->cpu_flags.bitfield.cpupadlock
11143       && (current_templates->end[-1].opcode_modifier.isstring
11144           || i.mem_operands))
11145     {
11146       /* Memory operands of string insns are special in that they only allow
11147          a single register (rDI, rSI, or rBX) as their memory address.  */
11148       const reg_entry *expected_reg;
11149       static const char *di_si[][2] =
11150         {
11151           { "esi", "edi" },
11152           { "si", "di" },
11153           { "rsi", "rdi" }
11154         };
11155       static const char *bx[] = { "ebx", "bx", "rbx" };
11156
11157       kind = "string address";
11158
11159       if (t->opcode_modifier.prefixok == PrefixRep)
11160         {
11161           int es_op = current_templates->end[-1].opcode_modifier.isstring
11162                       - IS_STRING_ES_OP0;
11163           int op = 0;
11164
11165           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
11166               || ((!i.mem_operands != !intel_syntax)
11167                   && current_templates->end[-1].operand_types[1]
11168                      .bitfield.baseindex))
11169             op = 1;
11170           expected_reg
11171             = (const reg_entry *) str_hash_find (reg_hash,
11172                                                  di_si[addr_mode][op == es_op]);
11173         }
11174       else
11175         expected_reg
11176           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11177
11178       if (i.base_reg != expected_reg
11179           || i.index_reg
11180           || operand_type_check (i.types[this_operand], disp))
11181         {
11182           /* The second memory operand must have the same size as
11183              the first one.  */
11184           if (i.mem_operands
11185               && i.base_reg
11186               && !((addr_mode == CODE_64BIT
11187                     && i.base_reg->reg_type.bitfield.qword)
11188                    || (addr_mode == CODE_32BIT
11189                        ? i.base_reg->reg_type.bitfield.dword
11190                        : i.base_reg->reg_type.bitfield.word)))
11191             goto bad_address;
11192
11193           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11194                    operand_string,
11195                    intel_syntax ? '[' : '(',
11196                    register_prefix,
11197                    expected_reg->reg_name,
11198                    intel_syntax ? ']' : ')');
11199           return 1;
11200         }
11201       else
11202         return 1;
11203
11204     bad_address:
11205       as_bad (_("`%s' is not a valid %s expression"),
11206               operand_string, kind);
11207       return 0;
11208     }
11209   else
11210     {
11211       if (addr_mode != CODE_16BIT)
11212         {
11213           /* 32-bit/64-bit checks.  */
11214           if (i.disp_encoding == disp_encoding_16bit)
11215             {
11216             bad_disp:
11217               as_bad (_("invalid `%s' prefix"),
11218                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11219               return 0;
11220             }
11221
11222           if ((i.base_reg
11223                && ((addr_mode == CODE_64BIT
11224                     ? !i.base_reg->reg_type.bitfield.qword
11225                     : !i.base_reg->reg_type.bitfield.dword)
11226                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11227                    || i.base_reg->reg_num == RegIZ))
11228               || (i.index_reg
11229                   && !i.index_reg->reg_type.bitfield.xmmword
11230                   && !i.index_reg->reg_type.bitfield.ymmword
11231                   && !i.index_reg->reg_type.bitfield.zmmword
11232                   && ((addr_mode == CODE_64BIT
11233                        ? !i.index_reg->reg_type.bitfield.qword
11234                        : !i.index_reg->reg_type.bitfield.dword)
11235                       || !i.index_reg->reg_type.bitfield.baseindex)))
11236             goto bad_address;
11237
11238           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11239           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11240                && t->opcode_modifier.opcodespace == SPACE_0F
11241                && t->base_opcode == 0x1b)
11242               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11243                   && t->opcode_modifier.opcodespace == SPACE_0F
11244                   && (t->base_opcode & ~1) == 0x1a)
11245               || t->opcode_modifier.sib == SIBMEM)
11246             {
11247               /* They cannot use RIP-relative addressing. */
11248               if (i.base_reg && i.base_reg->reg_num == RegIP)
11249                 {
11250                   as_bad (_("`%s' cannot be used here"), operand_string);
11251                   return 0;
11252                 }
11253
11254               /* bndldx and bndstx ignore their scale factor. */
11255               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11256                   && t->opcode_modifier.opcodespace == SPACE_0F
11257                   && (t->base_opcode & ~1) == 0x1a
11258                   && i.log2_scale_factor)
11259                 as_warn (_("register scaling is being ignored here"));
11260             }
11261         }
11262       else
11263         {
11264           /* 16-bit checks.  */
11265           if (i.disp_encoding == disp_encoding_32bit)
11266             goto bad_disp;
11267
11268           if ((i.base_reg
11269                && (!i.base_reg->reg_type.bitfield.word
11270                    || !i.base_reg->reg_type.bitfield.baseindex))
11271               || (i.index_reg
11272                   && (!i.index_reg->reg_type.bitfield.word
11273                       || !i.index_reg->reg_type.bitfield.baseindex
11274                       || !(i.base_reg
11275                            && i.base_reg->reg_num < 6
11276                            && i.index_reg->reg_num >= 6
11277                            && i.log2_scale_factor == 0))))
11278             goto bad_address;
11279         }
11280     }
11281   return 1;
11282 }
11283
11284 /* Handle vector immediates.  */
11285
11286 static int
11287 RC_SAE_immediate (const char *imm_start)
11288 {
11289   unsigned int match_found, j;
11290   const char *pstr = imm_start;
11291   expressionS *exp;
11292
11293   if (*pstr != '{')
11294     return 0;
11295
11296   pstr++;
11297   match_found = 0;
11298   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11299     {
11300       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11301         {
11302           if (i.rounding.type != rc_none)
11303             {
11304               as_bad (_("duplicated `%s'"), imm_start);
11305               return 0;
11306             }
11307
11308           i.rounding.type = RC_NamesTable[j].type;
11309           i.rounding.operand = this_operand;
11310
11311           pstr += RC_NamesTable[j].len;
11312           match_found = 1;
11313           break;
11314         }
11315     }
11316   if (!match_found)
11317     return 0;
11318
11319   if (*pstr++ != '}')
11320     {
11321       as_bad (_("Missing '}': '%s'"), imm_start);
11322       return 0;
11323     }
11324   /* RC/SAE immediate string should contain nothing more.  */;
11325   if (*pstr != 0)
11326     {
11327       as_bad (_("Junk after '}': '%s'"), imm_start);
11328       return 0;
11329     }
11330
11331   exp = &im_expressions[i.imm_operands++];
11332   i.op[this_operand].imms = exp;
11333
11334   exp->X_op = O_constant;
11335   exp->X_add_number = 0;
11336   exp->X_add_symbol = (symbolS *) 0;
11337   exp->X_op_symbol = (symbolS *) 0;
11338
11339   i.types[this_operand].bitfield.imm8 = 1;
11340   return 1;
11341 }
11342
11343 /* Only string instructions can have a second memory operand, so
11344    reduce current_templates to just those if it contains any.  */
11345 static int
11346 maybe_adjust_templates (void)
11347 {
11348   const insn_template *t;
11349
11350   gas_assert (i.mem_operands == 1);
11351
11352   for (t = current_templates->start; t < current_templates->end; ++t)
11353     if (t->opcode_modifier.isstring)
11354       break;
11355
11356   if (t < current_templates->end)
11357     {
11358       static templates aux_templates;
11359       bool recheck;
11360
11361       aux_templates.start = t;
11362       for (; t < current_templates->end; ++t)
11363         if (!t->opcode_modifier.isstring)
11364           break;
11365       aux_templates.end = t;
11366
11367       /* Determine whether to re-check the first memory operand.  */
11368       recheck = (aux_templates.start != current_templates->start
11369                  || t != current_templates->end);
11370
11371       current_templates = &aux_templates;
11372
11373       if (recheck)
11374         {
11375           i.mem_operands = 0;
11376           if (i.memop1_string != NULL
11377               && i386_index_check (i.memop1_string) == 0)
11378             return 0;
11379           i.mem_operands = 1;
11380         }
11381     }
11382
11383   return 1;
11384 }
11385
11386 static INLINE bool starts_memory_operand (char c)
11387 {
11388   return ISDIGIT (c)
11389          || is_identifier_char (c)
11390          || strchr ("([\"+-!~", c);
11391 }
11392
11393 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11394    on error.  */
11395
11396 static int
11397 i386_att_operand (char *operand_string)
11398 {
11399   const reg_entry *r;
11400   char *end_op;
11401   char *op_string = operand_string;
11402
11403   if (is_space_char (*op_string))
11404     ++op_string;
11405
11406   /* We check for an absolute prefix (differentiating,
11407      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11408   if (*op_string == ABSOLUTE_PREFIX)
11409     {
11410       ++op_string;
11411       if (is_space_char (*op_string))
11412         ++op_string;
11413       i.jumpabsolute = true;
11414     }
11415
11416   /* Check if operand is a register.  */
11417   if ((r = parse_register (op_string, &end_op)) != NULL)
11418     {
11419       i386_operand_type temp;
11420
11421       if (r == &bad_reg)
11422         return 0;
11423
11424       /* Check for a segment override by searching for ':' after a
11425          segment register.  */
11426       op_string = end_op;
11427       if (is_space_char (*op_string))
11428         ++op_string;
11429       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11430         {
11431           i.seg[i.mem_operands] = r;
11432
11433           /* Skip the ':' and whitespace.  */
11434           ++op_string;
11435           if (is_space_char (*op_string))
11436             ++op_string;
11437
11438           /* Handle case of %es:*foo.  */
11439           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11440             {
11441               ++op_string;
11442               if (is_space_char (*op_string))
11443                 ++op_string;
11444               i.jumpabsolute = true;
11445             }
11446
11447           if (!starts_memory_operand (*op_string))
11448             {
11449               as_bad (_("bad memory operand `%s'"), op_string);
11450               return 0;
11451             }
11452           goto do_memory_reference;
11453         }
11454
11455       /* Handle vector operations.  */
11456       if (*op_string == '{')
11457         {
11458           op_string = check_VecOperations (op_string);
11459           if (op_string == NULL)
11460             return 0;
11461         }
11462
11463       if (*op_string)
11464         {
11465           as_bad (_("junk `%s' after register"), op_string);
11466           return 0;
11467         }
11468       temp = r->reg_type;
11469       temp.bitfield.baseindex = 0;
11470       i.types[this_operand] = operand_type_or (i.types[this_operand],
11471                                                temp);
11472       i.types[this_operand].bitfield.unspecified = 0;
11473       i.op[this_operand].regs = r;
11474       i.reg_operands++;
11475     }
11476   else if (*op_string == REGISTER_PREFIX)
11477     {
11478       as_bad (_("bad register name `%s'"), op_string);
11479       return 0;
11480     }
11481   else if (*op_string == IMMEDIATE_PREFIX)
11482     {
11483       ++op_string;
11484       if (i.jumpabsolute)
11485         {
11486           as_bad (_("immediate operand illegal with absolute jump"));
11487           return 0;
11488         }
11489       if (!i386_immediate (op_string))
11490         return 0;
11491     }
11492   else if (RC_SAE_immediate (operand_string))
11493     {
11494       /* If it is a RC or SAE immediate, do nothing.  */
11495       ;
11496     }
11497   else if (starts_memory_operand (*op_string))
11498     {
11499       /* This is a memory reference of some sort.  */
11500       char *base_string;
11501
11502       /* Start and end of displacement string expression (if found).  */
11503       char *displacement_string_start;
11504       char *displacement_string_end;
11505
11506     do_memory_reference:
11507       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11508         return 0;
11509       if ((i.mem_operands == 1
11510            && !current_templates->start->opcode_modifier.isstring)
11511           || i.mem_operands == 2)
11512         {
11513           as_bad (_("too many memory references for `%s'"),
11514                   current_templates->start->name);
11515           return 0;
11516         }
11517
11518       /* Check for base index form.  We detect the base index form by
11519          looking for an ')' at the end of the operand, searching
11520          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11521          after the '('.  */
11522       base_string = op_string + strlen (op_string);
11523
11524       /* Handle vector operations.  */
11525       --base_string;
11526       if (is_space_char (*base_string))
11527         --base_string;
11528
11529       if (*base_string == '}')
11530         {
11531           char *vop_start = NULL;
11532
11533           while (base_string-- > op_string)
11534             {
11535               if (*base_string == '"')
11536                 break;
11537               if (*base_string != '{')
11538                 continue;
11539
11540               vop_start = base_string;
11541
11542               --base_string;
11543               if (is_space_char (*base_string))
11544                 --base_string;
11545
11546               if (*base_string != '}')
11547                 break;
11548
11549               vop_start = NULL;
11550             }
11551
11552           if (!vop_start)
11553             {
11554               as_bad (_("unbalanced figure braces"));
11555               return 0;
11556             }
11557
11558           if (check_VecOperations (vop_start) == NULL)
11559             return 0;
11560         }
11561
11562       /* If we only have a displacement, set-up for it to be parsed later.  */
11563       displacement_string_start = op_string;
11564       displacement_string_end = base_string + 1;
11565
11566       if (*base_string == ')')
11567         {
11568           char *temp_string;
11569
11570           /* We've already checked that the number of left & right ()'s are
11571              equal, so this loop will not be infinite.  */
11572           do
11573             {
11574               base_string--;
11575             }
11576           while (*base_string != '(' && *base_string != ')'
11577                  && *base_string != '"');
11578
11579           temp_string = base_string;
11580
11581           /* Skip past '(' and whitespace.  */
11582           if (*base_string == '(')
11583             ++base_string;
11584           if (is_space_char (*base_string))
11585             ++base_string;
11586
11587           if (*base_string == ','
11588               || ((i.base_reg = parse_register (base_string, &end_op))
11589                   != NULL))
11590             {
11591               displacement_string_end = temp_string;
11592
11593               i.types[this_operand].bitfield.baseindex = 1;
11594
11595               if (i.base_reg)
11596                 {
11597                   if (i.base_reg == &bad_reg)
11598                     return 0;
11599                   base_string = end_op;
11600                   if (is_space_char (*base_string))
11601                     ++base_string;
11602                 }
11603
11604               /* There may be an index reg or scale factor here.  */
11605               if (*base_string == ',')
11606                 {
11607                   ++base_string;
11608                   if (is_space_char (*base_string))
11609                     ++base_string;
11610
11611                   if ((i.index_reg = parse_register (base_string, &end_op))
11612                       != NULL)
11613                     {
11614                       if (i.index_reg == &bad_reg)
11615                         return 0;
11616                       base_string = end_op;
11617                       if (is_space_char (*base_string))
11618                         ++base_string;
11619                       if (*base_string == ',')
11620                         {
11621                           ++base_string;
11622                           if (is_space_char (*base_string))
11623                             ++base_string;
11624                         }
11625                       else if (*base_string != ')')
11626                         {
11627                           as_bad (_("expecting `,' or `)' "
11628                                     "after index register in `%s'"),
11629                                   operand_string);
11630                           return 0;
11631                         }
11632                     }
11633                   else if (*base_string == REGISTER_PREFIX)
11634                     {
11635                       end_op = strchr (base_string, ',');
11636                       if (end_op)
11637                         *end_op = '\0';
11638                       as_bad (_("bad register name `%s'"), base_string);
11639                       return 0;
11640                     }
11641
11642                   /* Check for scale factor.  */
11643                   if (*base_string != ')')
11644                     {
11645                       char *end_scale = i386_scale (base_string);
11646
11647                       if (!end_scale)
11648                         return 0;
11649
11650                       base_string = end_scale;
11651                       if (is_space_char (*base_string))
11652                         ++base_string;
11653                       if (*base_string != ')')
11654                         {
11655                           as_bad (_("expecting `)' "
11656                                     "after scale factor in `%s'"),
11657                                   operand_string);
11658                           return 0;
11659                         }
11660                     }
11661                   else if (!i.index_reg)
11662                     {
11663                       as_bad (_("expecting index register or scale factor "
11664                                 "after `,'; got '%c'"),
11665                               *base_string);
11666                       return 0;
11667                     }
11668                 }
11669               else if (*base_string != ')')
11670                 {
11671                   as_bad (_("expecting `,' or `)' "
11672                             "after base register in `%s'"),
11673                           operand_string);
11674                   return 0;
11675                 }
11676             }
11677           else if (*base_string == REGISTER_PREFIX)
11678             {
11679               end_op = strchr (base_string, ',');
11680               if (end_op)
11681                 *end_op = '\0';
11682               as_bad (_("bad register name `%s'"), base_string);
11683               return 0;
11684             }
11685         }
11686
11687       /* If there's an expression beginning the operand, parse it,
11688          assuming displacement_string_start and
11689          displacement_string_end are meaningful.  */
11690       if (displacement_string_start != displacement_string_end)
11691         {
11692           if (!i386_displacement (displacement_string_start,
11693                                   displacement_string_end))
11694             return 0;
11695         }
11696
11697       /* Special case for (%dx) while doing input/output op.  */
11698       if (i.base_reg
11699           && i.base_reg->reg_type.bitfield.instance == RegD
11700           && i.base_reg->reg_type.bitfield.word
11701           && i.index_reg == 0
11702           && i.log2_scale_factor == 0
11703           && i.seg[i.mem_operands] == 0
11704           && !operand_type_check (i.types[this_operand], disp))
11705         {
11706           i.types[this_operand] = i.base_reg->reg_type;
11707           return 1;
11708         }
11709
11710       if (i386_index_check (operand_string) == 0)
11711         return 0;
11712       i.flags[this_operand] |= Operand_Mem;
11713       if (i.mem_operands == 0)
11714         i.memop1_string = xstrdup (operand_string);
11715       i.mem_operands++;
11716     }
11717   else
11718     {
11719       /* It's not a memory operand; argh!  */
11720       as_bad (_("invalid char %s beginning operand %d `%s'"),
11721               output_invalid (*op_string),
11722               this_operand + 1,
11723               op_string);
11724       return 0;
11725     }
11726   return 1;                     /* Normal return.  */
11727 }
11728 \f
11729 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11730    that an rs_machine_dependent frag may reach.  */
11731
11732 unsigned int
11733 i386_frag_max_var (fragS *frag)
11734 {
11735   /* The only relaxable frags are for jumps.
11736      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11737   gas_assert (frag->fr_type == rs_machine_dependent);
11738   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11739 }
11740
11741 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11742 static int
11743 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11744 {
11745   /* STT_GNU_IFUNC symbol must go through PLT.  */
11746   if ((symbol_get_bfdsym (fr_symbol)->flags
11747        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11748     return 0;
11749
11750   if (!S_IS_EXTERNAL (fr_symbol))
11751     /* Symbol may be weak or local.  */
11752     return !S_IS_WEAK (fr_symbol);
11753
11754   /* Global symbols with non-default visibility can't be preempted. */
11755   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11756     return 1;
11757
11758   if (fr_var != NO_RELOC)
11759     switch ((enum bfd_reloc_code_real) fr_var)
11760       {
11761       case BFD_RELOC_386_PLT32:
11762       case BFD_RELOC_X86_64_PLT32:
11763         /* Symbol with PLT relocation may be preempted. */
11764         return 0;
11765       default:
11766         abort ();
11767       }
11768
11769   /* Global symbols with default visibility in a shared library may be
11770      preempted by another definition.  */
11771   return !shared;
11772 }
11773 #endif
11774
11775 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11776    Note also work for Skylake and Cascadelake.
11777 ---------------------------------------------------------------------
11778 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11779 | ------  | ----------- | ------- | -------- |
11780 |   Jo    |      N      |    N    |     Y    |
11781 |   Jno   |      N      |    N    |     Y    |
11782 |  Jc/Jb  |      Y      |    N    |     Y    |
11783 | Jae/Jnb |      Y      |    N    |     Y    |
11784 |  Je/Jz  |      Y      |    Y    |     Y    |
11785 | Jne/Jnz |      Y      |    Y    |     Y    |
11786 | Jna/Jbe |      Y      |    N    |     Y    |
11787 | Ja/Jnbe |      Y      |    N    |     Y    |
11788 |   Js    |      N      |    N    |     Y    |
11789 |   Jns   |      N      |    N    |     Y    |
11790 |  Jp/Jpe |      N      |    N    |     Y    |
11791 | Jnp/Jpo |      N      |    N    |     Y    |
11792 | Jl/Jnge |      Y      |    Y    |     Y    |
11793 | Jge/Jnl |      Y      |    Y    |     Y    |
11794 | Jle/Jng |      Y      |    Y    |     Y    |
11795 | Jg/Jnle |      Y      |    Y    |     Y    |
11796 ---------------------------------------------------------------------  */
11797 static int
11798 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11799 {
11800   if (mf_cmp == mf_cmp_alu_cmp)
11801     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11802             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11803   if (mf_cmp == mf_cmp_incdec)
11804     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11805             || mf_jcc == mf_jcc_jle);
11806   if (mf_cmp == mf_cmp_test_and)
11807     return 1;
11808   return 0;
11809 }
11810
11811 /* Return the next non-empty frag.  */
11812
11813 static fragS *
11814 i386_next_non_empty_frag (fragS *fragP)
11815 {
11816   /* There may be a frag with a ".fill 0" when there is no room in
11817      the current frag for frag_grow in output_insn.  */
11818   for (fragP = fragP->fr_next;
11819        (fragP != NULL
11820         && fragP->fr_type == rs_fill
11821         && fragP->fr_fix == 0);
11822        fragP = fragP->fr_next)
11823     ;
11824   return fragP;
11825 }
11826
11827 /* Return the next jcc frag after BRANCH_PADDING.  */
11828
11829 static fragS *
11830 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11831 {
11832   fragS *branch_fragP;
11833   if (!pad_fragP)
11834     return NULL;
11835
11836   if (pad_fragP->fr_type == rs_machine_dependent
11837       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11838           == BRANCH_PADDING))
11839     {
11840       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11841       if (branch_fragP->fr_type != rs_machine_dependent)
11842         return NULL;
11843       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11844           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11845                                    pad_fragP->tc_frag_data.mf_type))
11846         return branch_fragP;
11847     }
11848
11849   return NULL;
11850 }
11851
11852 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11853
11854 static void
11855 i386_classify_machine_dependent_frag (fragS *fragP)
11856 {
11857   fragS *cmp_fragP;
11858   fragS *pad_fragP;
11859   fragS *branch_fragP;
11860   fragS *next_fragP;
11861   unsigned int max_prefix_length;
11862
11863   if (fragP->tc_frag_data.classified)
11864     return;
11865
11866   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11867      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11868   for (next_fragP = fragP;
11869        next_fragP != NULL;
11870        next_fragP = next_fragP->fr_next)
11871     {
11872       next_fragP->tc_frag_data.classified = 1;
11873       if (next_fragP->fr_type == rs_machine_dependent)
11874         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11875           {
11876           case BRANCH_PADDING:
11877             /* The BRANCH_PADDING frag must be followed by a branch
11878                frag.  */
11879             branch_fragP = i386_next_non_empty_frag (next_fragP);
11880             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11881             break;
11882           case FUSED_JCC_PADDING:
11883             /* Check if this is a fused jcc:
11884                FUSED_JCC_PADDING
11885                CMP like instruction
11886                BRANCH_PADDING
11887                COND_JUMP
11888                */
11889             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11890             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11891             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11892             if (branch_fragP)
11893               {
11894                 /* The BRANCH_PADDING frag is merged with the
11895                    FUSED_JCC_PADDING frag.  */
11896                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11897                 /* CMP like instruction size.  */
11898                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11899                 frag_wane (pad_fragP);
11900                 /* Skip to branch_fragP.  */
11901                 next_fragP = branch_fragP;
11902               }
11903             else if (next_fragP->tc_frag_data.max_prefix_length)
11904               {
11905                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11906                    a fused jcc.  */
11907                 next_fragP->fr_subtype
11908                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11909                 next_fragP->tc_frag_data.max_bytes
11910                   = next_fragP->tc_frag_data.max_prefix_length;
11911                 /* This will be updated in the BRANCH_PREFIX scan.  */
11912                 next_fragP->tc_frag_data.max_prefix_length = 0;
11913               }
11914             else
11915               frag_wane (next_fragP);
11916             break;
11917           }
11918     }
11919
11920   /* Stop if there is no BRANCH_PREFIX.  */
11921   if (!align_branch_prefix_size)
11922     return;
11923
11924   /* Scan for BRANCH_PREFIX.  */
11925   for (; fragP != NULL; fragP = fragP->fr_next)
11926     {
11927       if (fragP->fr_type != rs_machine_dependent
11928           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11929               != BRANCH_PREFIX))
11930         continue;
11931
11932       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
11933          COND_JUMP_PREFIX.  */
11934       max_prefix_length = 0;
11935       for (next_fragP = fragP;
11936            next_fragP != NULL;
11937            next_fragP = next_fragP->fr_next)
11938         {
11939           if (next_fragP->fr_type == rs_fill)
11940             /* Skip rs_fill frags.  */
11941             continue;
11942           else if (next_fragP->fr_type != rs_machine_dependent)
11943             /* Stop for all other frags.  */
11944             break;
11945
11946           /* rs_machine_dependent frags.  */
11947           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11948               == BRANCH_PREFIX)
11949             {
11950               /* Count BRANCH_PREFIX frags.  */
11951               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
11952                 {
11953                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
11954                   frag_wane (next_fragP);
11955                 }
11956               else
11957                 max_prefix_length
11958                   += next_fragP->tc_frag_data.max_bytes;
11959             }
11960           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11961                     == BRANCH_PADDING)
11962                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11963                        == FUSED_JCC_PADDING))
11964             {
11965               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11966               fragP->tc_frag_data.u.padding_fragP = next_fragP;
11967               break;
11968             }
11969           else
11970             /* Stop for other rs_machine_dependent frags.  */
11971             break;
11972         }
11973
11974       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11975
11976       /* Skip to the next frag.  */
11977       fragP = next_fragP;
11978     }
11979 }
11980
11981 /* Compute padding size for
11982
11983         FUSED_JCC_PADDING
11984         CMP like instruction
11985         BRANCH_PADDING
11986         COND_JUMP/UNCOND_JUMP
11987
11988    or
11989
11990         BRANCH_PADDING
11991         COND_JUMP/UNCOND_JUMP
11992  */
11993
11994 static int
11995 i386_branch_padding_size (fragS *fragP, offsetT address)
11996 {
11997   unsigned int offset, size, padding_size;
11998   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11999
12000   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12001   if (!address)
12002     address = fragP->fr_address;
12003   address += fragP->fr_fix;
12004
12005   /* CMP like instrunction size.  */
12006   size = fragP->tc_frag_data.cmp_size;
12007
12008   /* The base size of the branch frag.  */
12009   size += branch_fragP->fr_fix;
12010
12011   /* Add opcode and displacement bytes for the rs_machine_dependent
12012      branch frag.  */
12013   if (branch_fragP->fr_type == rs_machine_dependent)
12014     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12015
12016   /* Check if branch is within boundary and doesn't end at the last
12017      byte.  */
12018   offset = address & ((1U << align_branch_power) - 1);
12019   if ((offset + size) >= (1U << align_branch_power))
12020     /* Padding needed to avoid crossing boundary.  */
12021     padding_size = (1U << align_branch_power) - offset;
12022   else
12023     /* No padding needed.  */
12024     padding_size = 0;
12025
12026   /* The return value may be saved in tc_frag_data.length which is
12027      unsigned byte.  */
12028   if (!fits_in_unsigned_byte (padding_size))
12029     abort ();
12030
12031   return padding_size;
12032 }
12033
12034 /* i386_generic_table_relax_frag()
12035
12036    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12037    grow/shrink padding to align branch frags.  Hand others to
12038    relax_frag().  */
12039
12040 long
12041 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12042 {
12043   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12044       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12045     {
12046       long padding_size = i386_branch_padding_size (fragP, 0);
12047       long grow = padding_size - fragP->tc_frag_data.length;
12048
12049       /* When the BRANCH_PREFIX frag is used, the computed address
12050          must match the actual address and there should be no padding.  */
12051       if (fragP->tc_frag_data.padding_address
12052           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12053               || padding_size))
12054         abort ();
12055
12056       /* Update the padding size.  */
12057       if (grow)
12058         fragP->tc_frag_data.length = padding_size;
12059
12060       return grow;
12061     }
12062   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12063     {
12064       fragS *padding_fragP, *next_fragP;
12065       long padding_size, left_size, last_size;
12066
12067       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12068       if (!padding_fragP)
12069         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12070         return (fragP->tc_frag_data.length
12071                 - fragP->tc_frag_data.last_length);
12072
12073       /* Compute the relative address of the padding frag in the very
12074         first time where the BRANCH_PREFIX frag sizes are zero.  */
12075       if (!fragP->tc_frag_data.padding_address)
12076         fragP->tc_frag_data.padding_address
12077           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12078
12079       /* First update the last length from the previous interation.  */
12080       left_size = fragP->tc_frag_data.prefix_length;
12081       for (next_fragP = fragP;
12082            next_fragP != padding_fragP;
12083            next_fragP = next_fragP->fr_next)
12084         if (next_fragP->fr_type == rs_machine_dependent
12085             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12086                 == BRANCH_PREFIX))
12087           {
12088             if (left_size)
12089               {
12090                 int max = next_fragP->tc_frag_data.max_bytes;
12091                 if (max)
12092                   {
12093                     int size;
12094                     if (max > left_size)
12095                       size = left_size;
12096                     else
12097                       size = max;
12098                     left_size -= size;
12099                     next_fragP->tc_frag_data.last_length = size;
12100                   }
12101               }
12102             else
12103               next_fragP->tc_frag_data.last_length = 0;
12104           }
12105
12106       /* Check the padding size for the padding frag.  */
12107       padding_size = i386_branch_padding_size
12108         (padding_fragP, (fragP->fr_address
12109                          + fragP->tc_frag_data.padding_address));
12110
12111       last_size = fragP->tc_frag_data.prefix_length;
12112       /* Check if there is change from the last interation.  */
12113       if (padding_size == last_size)
12114         {
12115           /* Update the expected address of the padding frag.  */
12116           padding_fragP->tc_frag_data.padding_address
12117             = (fragP->fr_address + padding_size
12118                + fragP->tc_frag_data.padding_address);
12119           return 0;
12120         }
12121
12122       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12123         {
12124           /* No padding if there is no sufficient room.  Clear the
12125              expected address of the padding frag.  */
12126           padding_fragP->tc_frag_data.padding_address = 0;
12127           padding_size = 0;
12128         }
12129       else
12130         /* Store the expected address of the padding frag.  */
12131         padding_fragP->tc_frag_data.padding_address
12132           = (fragP->fr_address + padding_size
12133              + fragP->tc_frag_data.padding_address);
12134
12135       fragP->tc_frag_data.prefix_length = padding_size;
12136
12137       /* Update the length for the current interation.  */
12138       left_size = padding_size;
12139       for (next_fragP = fragP;
12140            next_fragP != padding_fragP;
12141            next_fragP = next_fragP->fr_next)
12142         if (next_fragP->fr_type == rs_machine_dependent
12143             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12144                 == BRANCH_PREFIX))
12145           {
12146             if (left_size)
12147               {
12148                 int max = next_fragP->tc_frag_data.max_bytes;
12149                 if (max)
12150                   {
12151                     int size;
12152                     if (max > left_size)
12153                       size = left_size;
12154                     else
12155                       size = max;
12156                     left_size -= size;
12157                     next_fragP->tc_frag_data.length = size;
12158                   }
12159               }
12160             else
12161               next_fragP->tc_frag_data.length = 0;
12162           }
12163
12164       return (fragP->tc_frag_data.length
12165               - fragP->tc_frag_data.last_length);
12166     }
12167   return relax_frag (segment, fragP, stretch);
12168 }
12169
12170 /* md_estimate_size_before_relax()
12171
12172    Called just before relax() for rs_machine_dependent frags.  The x86
12173    assembler uses these frags to handle variable size jump
12174    instructions.
12175
12176    Any symbol that is now undefined will not become defined.
12177    Return the correct fr_subtype in the frag.
12178    Return the initial "guess for variable size of frag" to caller.
12179    The guess is actually the growth beyond the fixed part.  Whatever
12180    we do to grow the fixed or variable part contributes to our
12181    returned value.  */
12182
12183 int
12184 md_estimate_size_before_relax (fragS *fragP, segT segment)
12185 {
12186   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12187       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12188       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12189     {
12190       i386_classify_machine_dependent_frag (fragP);
12191       return fragP->tc_frag_data.length;
12192     }
12193
12194   /* We've already got fragP->fr_subtype right;  all we have to do is
12195      check for un-relaxable symbols.  On an ELF system, we can't relax
12196      an externally visible symbol, because it may be overridden by a
12197      shared library.  */
12198   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12199 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12200       || (IS_ELF
12201           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12202                                                 fragP->fr_var))
12203 #endif
12204 #if defined (OBJ_COFF) && defined (TE_PE)
12205       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12206           && S_IS_WEAK (fragP->fr_symbol))
12207 #endif
12208       )
12209     {
12210       /* Symbol is undefined in this segment, or we need to keep a
12211          reloc so that weak symbols can be overridden.  */
12212       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12213       enum bfd_reloc_code_real reloc_type;
12214       unsigned char *opcode;
12215       int old_fr_fix;
12216       fixS *fixP = NULL;
12217
12218       if (fragP->fr_var != NO_RELOC)
12219         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12220       else if (size == 2)
12221         reloc_type = BFD_RELOC_16_PCREL;
12222 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12223       else if (need_plt32_p (fragP->fr_symbol))
12224         reloc_type = BFD_RELOC_X86_64_PLT32;
12225 #endif
12226       else
12227         reloc_type = BFD_RELOC_32_PCREL;
12228
12229       old_fr_fix = fragP->fr_fix;
12230       opcode = (unsigned char *) fragP->fr_opcode;
12231
12232       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12233         {
12234         case UNCOND_JUMP:
12235           /* Make jmp (0xeb) a (d)word displacement jump.  */
12236           opcode[0] = 0xe9;
12237           fragP->fr_fix += size;
12238           fixP = fix_new (fragP, old_fr_fix, size,
12239                           fragP->fr_symbol,
12240                           fragP->fr_offset, 1,
12241                           reloc_type);
12242           break;
12243
12244         case COND_JUMP86:
12245           if (size == 2
12246               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12247             {
12248               /* Negate the condition, and branch past an
12249                  unconditional jump.  */
12250               opcode[0] ^= 1;
12251               opcode[1] = 3;
12252               /* Insert an unconditional jump.  */
12253               opcode[2] = 0xe9;
12254               /* We added two extra opcode bytes, and have a two byte
12255                  offset.  */
12256               fragP->fr_fix += 2 + 2;
12257               fix_new (fragP, old_fr_fix + 2, 2,
12258                        fragP->fr_symbol,
12259                        fragP->fr_offset, 1,
12260                        reloc_type);
12261               break;
12262             }
12263           /* Fall through.  */
12264
12265         case COND_JUMP:
12266           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12267             {
12268               fragP->fr_fix += 1;
12269               fixP = fix_new (fragP, old_fr_fix, 1,
12270                               fragP->fr_symbol,
12271                               fragP->fr_offset, 1,
12272                               BFD_RELOC_8_PCREL);
12273               fixP->fx_signed = 1;
12274               break;
12275             }
12276
12277           /* This changes the byte-displacement jump 0x7N
12278              to the (d)word-displacement jump 0x0f,0x8N.  */
12279           opcode[1] = opcode[0] + 0x10;
12280           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12281           /* We've added an opcode byte.  */
12282           fragP->fr_fix += 1 + size;
12283           fixP = fix_new (fragP, old_fr_fix + 1, size,
12284                           fragP->fr_symbol,
12285                           fragP->fr_offset, 1,
12286                           reloc_type);
12287           break;
12288
12289         default:
12290           BAD_CASE (fragP->fr_subtype);
12291           break;
12292         }
12293
12294       /* All jumps handled here are signed, but don't unconditionally use a
12295          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12296          around at 4G (outside of 64-bit mode) and 64k.  */
12297       if (size == 4 && flag_code == CODE_64BIT)
12298         fixP->fx_signed = 1;
12299
12300       frag_wane (fragP);
12301       return fragP->fr_fix - old_fr_fix;
12302     }
12303
12304   /* Guess size depending on current relax state.  Initially the relax
12305      state will correspond to a short jump and we return 1, because
12306      the variable part of the frag (the branch offset) is one byte
12307      long.  However, we can relax a section more than once and in that
12308      case we must either set fr_subtype back to the unrelaxed state,
12309      or return the value for the appropriate branch.  */
12310   return md_relax_table[fragP->fr_subtype].rlx_length;
12311 }
12312
12313 /* Called after relax() is finished.
12314
12315    In:  Address of frag.
12316         fr_type == rs_machine_dependent.
12317         fr_subtype is what the address relaxed to.
12318
12319    Out: Any fixSs and constants are set up.
12320         Caller will turn frag into a ".space 0".  */
12321
12322 void
12323 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12324                  fragS *fragP)
12325 {
12326   unsigned char *opcode;
12327   unsigned char *where_to_put_displacement = NULL;
12328   offsetT target_address;
12329   offsetT opcode_address;
12330   unsigned int extension = 0;
12331   offsetT displacement_from_opcode_start;
12332
12333   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12334       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12335       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12336     {
12337       /* Generate nop padding.  */
12338       unsigned int size = fragP->tc_frag_data.length;
12339       if (size)
12340         {
12341           if (size > fragP->tc_frag_data.max_bytes)
12342             abort ();
12343
12344           if (flag_debug)
12345             {
12346               const char *msg;
12347               const char *branch = "branch";
12348               const char *prefix = "";
12349               fragS *padding_fragP;
12350               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12351                   == BRANCH_PREFIX)
12352                 {
12353                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12354                   switch (fragP->tc_frag_data.default_prefix)
12355                     {
12356                     default:
12357                       abort ();
12358                       break;
12359                     case CS_PREFIX_OPCODE:
12360                       prefix = " cs";
12361                       break;
12362                     case DS_PREFIX_OPCODE:
12363                       prefix = " ds";
12364                       break;
12365                     case ES_PREFIX_OPCODE:
12366                       prefix = " es";
12367                       break;
12368                     case FS_PREFIX_OPCODE:
12369                       prefix = " fs";
12370                       break;
12371                     case GS_PREFIX_OPCODE:
12372                       prefix = " gs";
12373                       break;
12374                     case SS_PREFIX_OPCODE:
12375                       prefix = " ss";
12376                       break;
12377                     }
12378                   if (padding_fragP)
12379                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12380                             "%s within %d-byte boundary\n");
12381                   else
12382                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12383                             "align %s within %d-byte boundary\n");
12384                 }
12385               else
12386                 {
12387                   padding_fragP = fragP;
12388                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12389                           "%s within %d-byte boundary\n");
12390                 }
12391
12392               if (padding_fragP)
12393                 switch (padding_fragP->tc_frag_data.branch_type)
12394                   {
12395                   case align_branch_jcc:
12396                     branch = "jcc";
12397                     break;
12398                   case align_branch_fused:
12399                     branch = "fused jcc";
12400                     break;
12401                   case align_branch_jmp:
12402                     branch = "jmp";
12403                     break;
12404                   case align_branch_call:
12405                     branch = "call";
12406                     break;
12407                   case align_branch_indirect:
12408                     branch = "indiret branch";
12409                     break;
12410                   case align_branch_ret:
12411                     branch = "ret";
12412                     break;
12413                   default:
12414                     break;
12415                   }
12416
12417               fprintf (stdout, msg,
12418                        fragP->fr_file, fragP->fr_line, size, prefix,
12419                        (long long) fragP->fr_address, branch,
12420                        1 << align_branch_power);
12421             }
12422           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12423             memset (fragP->fr_opcode,
12424                     fragP->tc_frag_data.default_prefix, size);
12425           else
12426             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12427                                 size, 0);
12428           fragP->fr_fix += size;
12429         }
12430       return;
12431     }
12432
12433   opcode = (unsigned char *) fragP->fr_opcode;
12434
12435   /* Address we want to reach in file space.  */
12436   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12437
12438   /* Address opcode resides at in file space.  */
12439   opcode_address = fragP->fr_address + fragP->fr_fix;
12440
12441   /* Displacement from opcode start to fill into instruction.  */
12442   displacement_from_opcode_start = target_address - opcode_address;
12443
12444   if ((fragP->fr_subtype & BIG) == 0)
12445     {
12446       /* Don't have to change opcode.  */
12447       extension = 1;            /* 1 opcode + 1 displacement  */
12448       where_to_put_displacement = &opcode[1];
12449     }
12450   else
12451     {
12452       if (no_cond_jump_promotion
12453           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12454         as_warn_where (fragP->fr_file, fragP->fr_line,
12455                        _("long jump required"));
12456
12457       switch (fragP->fr_subtype)
12458         {
12459         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12460           extension = 4;                /* 1 opcode + 4 displacement  */
12461           opcode[0] = 0xe9;
12462           where_to_put_displacement = &opcode[1];
12463           break;
12464
12465         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12466           extension = 2;                /* 1 opcode + 2 displacement  */
12467           opcode[0] = 0xe9;
12468           where_to_put_displacement = &opcode[1];
12469           break;
12470
12471         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12472         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12473           extension = 5;                /* 2 opcode + 4 displacement  */
12474           opcode[1] = opcode[0] + 0x10;
12475           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12476           where_to_put_displacement = &opcode[2];
12477           break;
12478
12479         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12480           extension = 3;                /* 2 opcode + 2 displacement  */
12481           opcode[1] = opcode[0] + 0x10;
12482           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12483           where_to_put_displacement = &opcode[2];
12484           break;
12485
12486         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12487           extension = 4;
12488           opcode[0] ^= 1;
12489           opcode[1] = 3;
12490           opcode[2] = 0xe9;
12491           where_to_put_displacement = &opcode[3];
12492           break;
12493
12494         default:
12495           BAD_CASE (fragP->fr_subtype);
12496           break;
12497         }
12498     }
12499
12500   /* If size if less then four we are sure that the operand fits,
12501      but if it's 4, then it could be that the displacement is larger
12502      then -/+ 2GB.  */
12503   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12504       && object_64bit
12505       && ((addressT) (displacement_from_opcode_start - extension
12506                       + ((addressT) 1 << 31))
12507           > (((addressT) 2 << 31) - 1)))
12508     {
12509       as_bad_where (fragP->fr_file, fragP->fr_line,
12510                     _("jump target out of range"));
12511       /* Make us emit 0.  */
12512       displacement_from_opcode_start = extension;
12513     }
12514   /* Now put displacement after opcode.  */
12515   md_number_to_chars ((char *) where_to_put_displacement,
12516                       (valueT) (displacement_from_opcode_start - extension),
12517                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12518   fragP->fr_fix += extension;
12519 }
12520 \f
12521 /* Apply a fixup (fixP) to segment data, once it has been determined
12522    by our caller that we have all the info we need to fix it up.
12523
12524    Parameter valP is the pointer to the value of the bits.
12525
12526    On the 386, immediates, displacements, and data pointers are all in
12527    the same (little-endian) format, so we don't need to care about which
12528    we are handling.  */
12529
12530 void
12531 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12532 {
12533   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12534   valueT value = *valP;
12535
12536 #if !defined (TE_Mach)
12537   if (fixP->fx_pcrel)
12538     {
12539       switch (fixP->fx_r_type)
12540         {
12541         default:
12542           break;
12543
12544         case BFD_RELOC_64:
12545           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12546           break;
12547         case BFD_RELOC_32:
12548         case BFD_RELOC_X86_64_32S:
12549           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12550           break;
12551         case BFD_RELOC_16:
12552           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12553           break;
12554         case BFD_RELOC_8:
12555           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12556           break;
12557         }
12558     }
12559
12560   if (fixP->fx_addsy != NULL
12561       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12562           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12563           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12564           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12565       && !use_rela_relocations)
12566     {
12567       /* This is a hack.  There should be a better way to handle this.
12568          This covers for the fact that bfd_install_relocation will
12569          subtract the current location (for partial_inplace, PC relative
12570          relocations); see more below.  */
12571 #ifndef OBJ_AOUT
12572       if (IS_ELF
12573 #ifdef TE_PE
12574           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12575 #endif
12576           )
12577         value += fixP->fx_where + fixP->fx_frag->fr_address;
12578 #endif
12579 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12580       if (IS_ELF)
12581         {
12582           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12583
12584           if ((sym_seg == seg
12585                || (symbol_section_p (fixP->fx_addsy)
12586                    && sym_seg != absolute_section))
12587               && !generic_force_reloc (fixP))
12588             {
12589               /* Yes, we add the values in twice.  This is because
12590                  bfd_install_relocation subtracts them out again.  I think
12591                  bfd_install_relocation is broken, but I don't dare change
12592                  it.  FIXME.  */
12593               value += fixP->fx_where + fixP->fx_frag->fr_address;
12594             }
12595         }
12596 #endif
12597 #if defined (OBJ_COFF) && defined (TE_PE)
12598       /* For some reason, the PE format does not store a
12599          section address offset for a PC relative symbol.  */
12600       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12601           || S_IS_WEAK (fixP->fx_addsy))
12602         value += md_pcrel_from (fixP);
12603 #endif
12604     }
12605 #if defined (OBJ_COFF) && defined (TE_PE)
12606   if (fixP->fx_addsy != NULL
12607       && S_IS_WEAK (fixP->fx_addsy)
12608       /* PR 16858: Do not modify weak function references.  */
12609       && ! fixP->fx_pcrel)
12610     {
12611 #if !defined (TE_PEP)
12612       /* For x86 PE weak function symbols are neither PC-relative
12613          nor do they set S_IS_FUNCTION.  So the only reliable way
12614          to detect them is to check the flags of their containing
12615          section.  */
12616       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12617           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12618         ;
12619       else
12620 #endif
12621       value -= S_GET_VALUE (fixP->fx_addsy);
12622     }
12623 #endif
12624
12625   /* Fix a few things - the dynamic linker expects certain values here,
12626      and we must not disappoint it.  */
12627 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12628   if (IS_ELF && fixP->fx_addsy)
12629     switch (fixP->fx_r_type)
12630       {
12631       case BFD_RELOC_386_PLT32:
12632       case BFD_RELOC_X86_64_PLT32:
12633         /* Make the jump instruction point to the address of the operand.
12634            At runtime we merely add the offset to the actual PLT entry.
12635            NB: Subtract the offset size only for jump instructions.  */
12636         if (fixP->fx_pcrel)
12637           value = -4;
12638         break;
12639
12640       case BFD_RELOC_386_TLS_GD:
12641       case BFD_RELOC_386_TLS_LDM:
12642       case BFD_RELOC_386_TLS_IE_32:
12643       case BFD_RELOC_386_TLS_IE:
12644       case BFD_RELOC_386_TLS_GOTIE:
12645       case BFD_RELOC_386_TLS_GOTDESC:
12646       case BFD_RELOC_X86_64_TLSGD:
12647       case BFD_RELOC_X86_64_TLSLD:
12648       case BFD_RELOC_X86_64_GOTTPOFF:
12649       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12650         value = 0; /* Fully resolved at runtime.  No addend.  */
12651         /* Fallthrough */
12652       case BFD_RELOC_386_TLS_LE:
12653       case BFD_RELOC_386_TLS_LDO_32:
12654       case BFD_RELOC_386_TLS_LE_32:
12655       case BFD_RELOC_X86_64_DTPOFF32:
12656       case BFD_RELOC_X86_64_DTPOFF64:
12657       case BFD_RELOC_X86_64_TPOFF32:
12658       case BFD_RELOC_X86_64_TPOFF64:
12659         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12660         break;
12661
12662       case BFD_RELOC_386_TLS_DESC_CALL:
12663       case BFD_RELOC_X86_64_TLSDESC_CALL:
12664         value = 0; /* Fully resolved at runtime.  No addend.  */
12665         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12666         fixP->fx_done = 0;
12667         return;
12668
12669       case BFD_RELOC_VTABLE_INHERIT:
12670       case BFD_RELOC_VTABLE_ENTRY:
12671         fixP->fx_done = 0;
12672         return;
12673
12674       default:
12675         break;
12676       }
12677 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12678
12679   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12680   if (!object_64bit)
12681     value = extend_to_32bit_address (value);
12682
12683   *valP = value;
12684 #endif /* !defined (TE_Mach)  */
12685
12686   /* Are we finished with this relocation now?  */
12687   if (fixP->fx_addsy == NULL)
12688     {
12689       fixP->fx_done = 1;
12690       switch (fixP->fx_r_type)
12691         {
12692         case BFD_RELOC_X86_64_32S:
12693           fixP->fx_signed = 1;
12694           break;
12695
12696         default:
12697           break;
12698         }
12699     }
12700 #if defined (OBJ_COFF) && defined (TE_PE)
12701   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12702     {
12703       fixP->fx_done = 0;
12704       /* Remember value for tc_gen_reloc.  */
12705       fixP->fx_addnumber = value;
12706       /* Clear out the frag for now.  */
12707       value = 0;
12708     }
12709 #endif
12710   else if (use_rela_relocations)
12711     {
12712       fixP->fx_no_overflow = 1;
12713       /* Remember value for tc_gen_reloc.  */
12714       fixP->fx_addnumber = value;
12715       value = 0;
12716     }
12717
12718   md_number_to_chars (p, value, fixP->fx_size);
12719 }
12720 \f
12721 const char *
12722 md_atof (int type, char *litP, int *sizeP)
12723 {
12724   /* This outputs the LITTLENUMs in REVERSE order;
12725      in accord with the bigendian 386.  */
12726   return ieee_md_atof (type, litP, sizeP, false);
12727 }
12728 \f
12729 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12730
12731 static char *
12732 output_invalid (int c)
12733 {
12734   if (ISPRINT (c))
12735     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12736               "'%c'", c);
12737   else
12738     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12739               "(0x%x)", (unsigned char) c);
12740   return output_invalid_buf;
12741 }
12742
12743 /* Verify that @r can be used in the current context.  */
12744
12745 static bool check_register (const reg_entry *r)
12746 {
12747   if (allow_pseudo_reg)
12748     return true;
12749
12750   if (operand_type_all_zero (&r->reg_type))
12751     return false;
12752
12753   if ((r->reg_type.bitfield.dword
12754        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12755        || r->reg_type.bitfield.class == RegCR
12756        || r->reg_type.bitfield.class == RegDR)
12757       && !cpu_arch_flags.bitfield.cpui386)
12758     return false;
12759
12760   if (r->reg_type.bitfield.class == RegTR
12761       && (flag_code == CODE_64BIT
12762           || !cpu_arch_flags.bitfield.cpui386
12763           || cpu_arch_isa_flags.bitfield.cpui586
12764           || cpu_arch_isa_flags.bitfield.cpui686))
12765     return false;
12766
12767   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12768     return false;
12769
12770   if (!cpu_arch_flags.bitfield.cpuavx512f)
12771     {
12772       if (r->reg_type.bitfield.zmmword
12773           || r->reg_type.bitfield.class == RegMask)
12774         return false;
12775
12776       if (!cpu_arch_flags.bitfield.cpuavx)
12777         {
12778           if (r->reg_type.bitfield.ymmword)
12779             return false;
12780
12781           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12782             return false;
12783         }
12784     }
12785
12786   if (r->reg_type.bitfield.tmmword
12787       && (!cpu_arch_flags.bitfield.cpuamx_tile
12788           || flag_code != CODE_64BIT))
12789     return false;
12790
12791   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12792     return false;
12793
12794   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12795   if (!allow_index_reg && r->reg_num == RegIZ)
12796     return false;
12797
12798   /* Upper 16 vector registers are only available with VREX in 64bit
12799      mode, and require EVEX encoding.  */
12800   if (r->reg_flags & RegVRex)
12801     {
12802       if (!cpu_arch_flags.bitfield.cpuavx512f
12803           || flag_code != CODE_64BIT)
12804         return false;
12805
12806       if (i.vec_encoding == vex_encoding_default)
12807         i.vec_encoding = vex_encoding_evex;
12808       else if (i.vec_encoding != vex_encoding_evex)
12809         i.vec_encoding = vex_encoding_error;
12810     }
12811
12812   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12813       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12814       && flag_code != CODE_64BIT)
12815     return false;
12816
12817   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12818       && !intel_syntax)
12819     return false;
12820
12821   return true;
12822 }
12823
12824 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12825
12826 static const reg_entry *
12827 parse_real_register (char *reg_string, char **end_op)
12828 {
12829   char *s = reg_string;
12830   char *p;
12831   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12832   const reg_entry *r;
12833
12834   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12835   if (*s == REGISTER_PREFIX)
12836     ++s;
12837
12838   if (is_space_char (*s))
12839     ++s;
12840
12841   p = reg_name_given;
12842   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12843     {
12844       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12845         return (const reg_entry *) NULL;
12846       s++;
12847     }
12848
12849   /* For naked regs, make sure that we are not dealing with an identifier.
12850      This prevents confusing an identifier like `eax_var' with register
12851      `eax'.  */
12852   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12853     return (const reg_entry *) NULL;
12854
12855   *end_op = s;
12856
12857   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
12858
12859   /* Handle floating point regs, allowing spaces in the (i) part.  */
12860   if (r == reg_st0)
12861     {
12862       if (!cpu_arch_flags.bitfield.cpu8087
12863           && !cpu_arch_flags.bitfield.cpu287
12864           && !cpu_arch_flags.bitfield.cpu387
12865           && !allow_pseudo_reg)
12866         return (const reg_entry *) NULL;
12867
12868       if (is_space_char (*s))
12869         ++s;
12870       if (*s == '(')
12871         {
12872           ++s;
12873           if (is_space_char (*s))
12874             ++s;
12875           if (*s >= '0' && *s <= '7')
12876             {
12877               int fpr = *s - '0';
12878               ++s;
12879               if (is_space_char (*s))
12880                 ++s;
12881               if (*s == ')')
12882                 {
12883                   *end_op = s + 1;
12884                   know (r[fpr].reg_num == fpr);
12885                   return r + fpr;
12886                 }
12887             }
12888           /* We have "%st(" then garbage.  */
12889           return (const reg_entry *) NULL;
12890         }
12891     }
12892
12893   return r && check_register (r) ? r : NULL;
12894 }
12895
12896 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12897
12898 static const reg_entry *
12899 parse_register (char *reg_string, char **end_op)
12900 {
12901   const reg_entry *r;
12902
12903   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12904     r = parse_real_register (reg_string, end_op);
12905   else
12906     r = NULL;
12907   if (!r)
12908     {
12909       char *save = input_line_pointer;
12910       char c;
12911       symbolS *symbolP;
12912
12913       input_line_pointer = reg_string;
12914       c = get_symbol_name (&reg_string);
12915       symbolP = symbol_find (reg_string);
12916       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
12917         {
12918           const expressionS *e = symbol_get_value_expression (symbolP);
12919
12920           know (e->X_op == O_register);
12921           know (e->X_add_number >= 0
12922                 && (valueT) e->X_add_number < i386_regtab_size);
12923           r = i386_regtab + e->X_add_number;
12924           if (!check_register (r))
12925             {
12926               as_bad (_("register '%s%s' cannot be used here"),
12927                       register_prefix, r->reg_name);
12928               r = &bad_reg;
12929             }
12930           *end_op = input_line_pointer;
12931         }
12932       *input_line_pointer = c;
12933       input_line_pointer = save;
12934     }
12935   return r;
12936 }
12937
12938 int
12939 i386_parse_name (char *name, expressionS *e, char *nextcharP)
12940 {
12941   const reg_entry *r;
12942   char *end = input_line_pointer;
12943
12944   *end = *nextcharP;
12945   r = parse_register (name, &input_line_pointer);
12946   if (r && end <= input_line_pointer)
12947     {
12948       *nextcharP = *input_line_pointer;
12949       *input_line_pointer = 0;
12950       if (r != &bad_reg)
12951         {
12952           e->X_op = O_register;
12953           e->X_add_number = r - i386_regtab;
12954         }
12955       else
12956           e->X_op = O_illegal;
12957       return 1;
12958     }
12959   input_line_pointer = end;
12960   *end = 0;
12961   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
12962 }
12963
12964 void
12965 md_operand (expressionS *e)
12966 {
12967   char *end;
12968   const reg_entry *r;
12969
12970   switch (*input_line_pointer)
12971     {
12972     case REGISTER_PREFIX:
12973       r = parse_real_register (input_line_pointer, &end);
12974       if (r)
12975         {
12976           e->X_op = O_register;
12977           e->X_add_number = r - i386_regtab;
12978           input_line_pointer = end;
12979         }
12980       break;
12981
12982     case '[':
12983       gas_assert (intel_syntax);
12984       end = input_line_pointer++;
12985       expression (e);
12986       if (*input_line_pointer == ']')
12987         {
12988           ++input_line_pointer;
12989           e->X_op_symbol = make_expr_symbol (e);
12990           e->X_add_symbol = NULL;
12991           e->X_add_number = 0;
12992           e->X_op = O_index;
12993         }
12994       else
12995         {
12996           e->X_op = O_absent;
12997           input_line_pointer = end;
12998         }
12999       break;
13000     }
13001 }
13002
13003 \f
13004 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13005 const char *md_shortopts = "kVQ:sqnO::";
13006 #else
13007 const char *md_shortopts = "qnO::";
13008 #endif
13009
13010 #define OPTION_32 (OPTION_MD_BASE + 0)
13011 #define OPTION_64 (OPTION_MD_BASE + 1)
13012 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13013 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13014 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13015 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13016 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13017 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13018 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13019 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13020 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13021 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13022 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13023 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13024 #define OPTION_X32 (OPTION_MD_BASE + 14)
13025 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13026 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13027 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13028 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13029 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13030 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13031 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13032 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13033 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13034 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13035 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13036 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13037 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13038 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13039 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13040 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13041 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13042 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13043 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13044
13045 struct option md_longopts[] =
13046 {
13047   {"32", no_argument, NULL, OPTION_32},
13048 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13049      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13050   {"64", no_argument, NULL, OPTION_64},
13051 #endif
13052 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13053   {"x32", no_argument, NULL, OPTION_X32},
13054   {"mshared", no_argument, NULL, OPTION_MSHARED},
13055   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13056 #endif
13057   {"divide", no_argument, NULL, OPTION_DIVIDE},
13058   {"march", required_argument, NULL, OPTION_MARCH},
13059   {"mtune", required_argument, NULL, OPTION_MTUNE},
13060   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13061   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13062   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13063   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13064   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13065   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13066   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13067   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13068   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13069   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13070   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13071   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13072 # if defined (TE_PE) || defined (TE_PEP)
13073   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13074 #endif
13075   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13076   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13077   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13078   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13079   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13080   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13081   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13082   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13083   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13084   {"mlfence-before-indirect-branch", required_argument, NULL,
13085    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13086   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13087   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13088   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13089   {NULL, no_argument, NULL, 0}
13090 };
13091 size_t md_longopts_size = sizeof (md_longopts);
13092
13093 int
13094 md_parse_option (int c, const char *arg)
13095 {
13096   unsigned int j;
13097   char *arch, *next, *saved, *type;
13098
13099   switch (c)
13100     {
13101     case 'n':
13102       optimize_align_code = 0;
13103       break;
13104
13105     case 'q':
13106       quiet_warnings = 1;
13107       break;
13108
13109 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13110       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13111          should be emitted or not.  FIXME: Not implemented.  */
13112     case 'Q':
13113       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13114         return 0;
13115       break;
13116
13117       /* -V: SVR4 argument to print version ID.  */
13118     case 'V':
13119       print_version_id ();
13120       break;
13121
13122       /* -k: Ignore for FreeBSD compatibility.  */
13123     case 'k':
13124       break;
13125
13126     case 's':
13127       /* -s: On i386 Solaris, this tells the native assembler to use
13128          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13129       break;
13130
13131     case OPTION_MSHARED:
13132       shared = 1;
13133       break;
13134
13135     case OPTION_X86_USED_NOTE:
13136       if (strcasecmp (arg, "yes") == 0)
13137         x86_used_note = 1;
13138       else if (strcasecmp (arg, "no") == 0)
13139         x86_used_note = 0;
13140       else
13141         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13142       break;
13143
13144
13145 #endif
13146 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13147      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13148     case OPTION_64:
13149       {
13150         const char **list, **l;
13151
13152         list = bfd_target_list ();
13153         for (l = list; *l != NULL; l++)
13154           if (startswith (*l, "elf64-x86-64")
13155               || strcmp (*l, "coff-x86-64") == 0
13156               || strcmp (*l, "pe-x86-64") == 0
13157               || strcmp (*l, "pei-x86-64") == 0
13158               || strcmp (*l, "mach-o-x86-64") == 0)
13159             {
13160               default_arch = "x86_64";
13161               break;
13162             }
13163         if (*l == NULL)
13164           as_fatal (_("no compiled in support for x86_64"));
13165         free (list);
13166       }
13167       break;
13168 #endif
13169
13170 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13171     case OPTION_X32:
13172       if (IS_ELF)
13173         {
13174           const char **list, **l;
13175
13176           list = bfd_target_list ();
13177           for (l = list; *l != NULL; l++)
13178             if (startswith (*l, "elf32-x86-64"))
13179               {
13180                 default_arch = "x86_64:32";
13181                 break;
13182               }
13183           if (*l == NULL)
13184             as_fatal (_("no compiled in support for 32bit x86_64"));
13185           free (list);
13186         }
13187       else
13188         as_fatal (_("32bit x86_64 is only supported for ELF"));
13189       break;
13190 #endif
13191
13192     case OPTION_32:
13193       default_arch = "i386";
13194       break;
13195
13196     case OPTION_DIVIDE:
13197 #ifdef SVR4_COMMENT_CHARS
13198       {
13199         char *n, *t;
13200         const char *s;
13201
13202         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13203         t = n;
13204         for (s = i386_comment_chars; *s != '\0'; s++)
13205           if (*s != '/')
13206             *t++ = *s;
13207         *t = '\0';
13208         i386_comment_chars = n;
13209       }
13210 #endif
13211       break;
13212
13213     case OPTION_MARCH:
13214       saved = xstrdup (arg);
13215       arch = saved;
13216       /* Allow -march=+nosse.  */
13217       if (*arch == '+')
13218         arch++;
13219       do
13220         {
13221           if (*arch == '.')
13222             as_fatal (_("invalid -march= option: `%s'"), arg);
13223           next = strchr (arch, '+');
13224           if (next)
13225             *next++ = '\0';
13226           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13227             {
13228               if (strcmp (arch, cpu_arch [j].name) == 0)
13229                 {
13230                   /* Processor.  */
13231                   if (! cpu_arch[j].flags.bitfield.cpui386)
13232                     continue;
13233
13234                   cpu_arch_name = cpu_arch[j].name;
13235                   cpu_sub_arch_name = NULL;
13236                   cpu_arch_flags = cpu_arch[j].flags;
13237                   cpu_arch_isa = cpu_arch[j].type;
13238                   cpu_arch_isa_flags = cpu_arch[j].flags;
13239                   if (!cpu_arch_tune_set)
13240                     {
13241                       cpu_arch_tune = cpu_arch_isa;
13242                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13243                     }
13244                   break;
13245                 }
13246               else if (*cpu_arch [j].name == '.'
13247                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
13248                 {
13249                   /* ISA extension.  */
13250                   i386_cpu_flags flags;
13251
13252                   flags = cpu_flags_or (cpu_arch_flags,
13253                                         cpu_arch[j].flags);
13254
13255                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13256                     {
13257                       if (cpu_sub_arch_name)
13258                         {
13259                           char *name = cpu_sub_arch_name;
13260                           cpu_sub_arch_name = concat (name,
13261                                                       cpu_arch[j].name,
13262                                                       (const char *) NULL);
13263                           free (name);
13264                         }
13265                       else
13266                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
13267                       cpu_arch_flags = flags;
13268                       cpu_arch_isa_flags = flags;
13269                     }
13270                   else
13271                     cpu_arch_isa_flags
13272                       = cpu_flags_or (cpu_arch_isa_flags,
13273                                       cpu_arch[j].flags);
13274                   break;
13275                 }
13276             }
13277
13278           if (j >= ARRAY_SIZE (cpu_arch))
13279             {
13280               /* Disable an ISA extension.  */
13281               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13282                 if (strcmp (arch, cpu_noarch [j].name) == 0)
13283                   {
13284                     i386_cpu_flags flags;
13285
13286                     flags = cpu_flags_and_not (cpu_arch_flags,
13287                                                cpu_noarch[j].flags);
13288                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13289                       {
13290                         if (cpu_sub_arch_name)
13291                           {
13292                             char *name = cpu_sub_arch_name;
13293                             cpu_sub_arch_name = concat (arch,
13294                                                         (const char *) NULL);
13295                             free (name);
13296                           }
13297                         else
13298                           cpu_sub_arch_name = xstrdup (arch);
13299                         cpu_arch_flags = flags;
13300                         cpu_arch_isa_flags = flags;
13301                       }
13302                     break;
13303                   }
13304
13305               if (j >= ARRAY_SIZE (cpu_noarch))
13306                 j = ARRAY_SIZE (cpu_arch);
13307             }
13308
13309           if (j >= ARRAY_SIZE (cpu_arch))
13310             as_fatal (_("invalid -march= option: `%s'"), arg);
13311
13312           arch = next;
13313         }
13314       while (next != NULL);
13315       free (saved);
13316       break;
13317
13318     case OPTION_MTUNE:
13319       if (*arg == '.')
13320         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13321       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13322         {
13323           if (strcmp (arg, cpu_arch [j].name) == 0)
13324             {
13325               cpu_arch_tune_set = 1;
13326               cpu_arch_tune = cpu_arch [j].type;
13327               cpu_arch_tune_flags = cpu_arch[j].flags;
13328               break;
13329             }
13330         }
13331       if (j >= ARRAY_SIZE (cpu_arch))
13332         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13333       break;
13334
13335     case OPTION_MMNEMONIC:
13336       if (strcasecmp (arg, "att") == 0)
13337         intel_mnemonic = 0;
13338       else if (strcasecmp (arg, "intel") == 0)
13339         intel_mnemonic = 1;
13340       else
13341         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13342       break;
13343
13344     case OPTION_MSYNTAX:
13345       if (strcasecmp (arg, "att") == 0)
13346         intel_syntax = 0;
13347       else if (strcasecmp (arg, "intel") == 0)
13348         intel_syntax = 1;
13349       else
13350         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13351       break;
13352
13353     case OPTION_MINDEX_REG:
13354       allow_index_reg = 1;
13355       break;
13356
13357     case OPTION_MNAKED_REG:
13358       allow_naked_reg = 1;
13359       break;
13360
13361     case OPTION_MSSE2AVX:
13362       sse2avx = 1;
13363       break;
13364
13365     case OPTION_MSSE_CHECK:
13366       if (strcasecmp (arg, "error") == 0)
13367         sse_check = check_error;
13368       else if (strcasecmp (arg, "warning") == 0)
13369         sse_check = check_warning;
13370       else if (strcasecmp (arg, "none") == 0)
13371         sse_check = check_none;
13372       else
13373         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13374       break;
13375
13376     case OPTION_MOPERAND_CHECK:
13377       if (strcasecmp (arg, "error") == 0)
13378         operand_check = check_error;
13379       else if (strcasecmp (arg, "warning") == 0)
13380         operand_check = check_warning;
13381       else if (strcasecmp (arg, "none") == 0)
13382         operand_check = check_none;
13383       else
13384         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13385       break;
13386
13387     case OPTION_MAVXSCALAR:
13388       if (strcasecmp (arg, "128") == 0)
13389         avxscalar = vex128;
13390       else if (strcasecmp (arg, "256") == 0)
13391         avxscalar = vex256;
13392       else
13393         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13394       break;
13395
13396     case OPTION_MVEXWIG:
13397       if (strcmp (arg, "0") == 0)
13398         vexwig = vexw0;
13399       else if (strcmp (arg, "1") == 0)
13400         vexwig = vexw1;
13401       else
13402         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13403       break;
13404
13405     case OPTION_MADD_BND_PREFIX:
13406       add_bnd_prefix = 1;
13407       break;
13408
13409     case OPTION_MEVEXLIG:
13410       if (strcmp (arg, "128") == 0)
13411         evexlig = evexl128;
13412       else if (strcmp (arg, "256") == 0)
13413         evexlig = evexl256;
13414       else  if (strcmp (arg, "512") == 0)
13415         evexlig = evexl512;
13416       else
13417         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13418       break;
13419
13420     case OPTION_MEVEXRCIG:
13421       if (strcmp (arg, "rne") == 0)
13422         evexrcig = rne;
13423       else if (strcmp (arg, "rd") == 0)
13424         evexrcig = rd;
13425       else if (strcmp (arg, "ru") == 0)
13426         evexrcig = ru;
13427       else if (strcmp (arg, "rz") == 0)
13428         evexrcig = rz;
13429       else
13430         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13431       break;
13432
13433     case OPTION_MEVEXWIG:
13434       if (strcmp (arg, "0") == 0)
13435         evexwig = evexw0;
13436       else if (strcmp (arg, "1") == 0)
13437         evexwig = evexw1;
13438       else
13439         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13440       break;
13441
13442 # if defined (TE_PE) || defined (TE_PEP)
13443     case OPTION_MBIG_OBJ:
13444       use_big_obj = 1;
13445       break;
13446 #endif
13447
13448     case OPTION_MOMIT_LOCK_PREFIX:
13449       if (strcasecmp (arg, "yes") == 0)
13450         omit_lock_prefix = 1;
13451       else if (strcasecmp (arg, "no") == 0)
13452         omit_lock_prefix = 0;
13453       else
13454         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13455       break;
13456
13457     case OPTION_MFENCE_AS_LOCK_ADD:
13458       if (strcasecmp (arg, "yes") == 0)
13459         avoid_fence = 1;
13460       else if (strcasecmp (arg, "no") == 0)
13461         avoid_fence = 0;
13462       else
13463         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13464       break;
13465
13466     case OPTION_MLFENCE_AFTER_LOAD:
13467       if (strcasecmp (arg, "yes") == 0)
13468         lfence_after_load = 1;
13469       else if (strcasecmp (arg, "no") == 0)
13470         lfence_after_load = 0;
13471       else
13472         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13473       break;
13474
13475     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13476       if (strcasecmp (arg, "all") == 0)
13477         {
13478           lfence_before_indirect_branch = lfence_branch_all;
13479           if (lfence_before_ret == lfence_before_ret_none)
13480             lfence_before_ret = lfence_before_ret_shl;
13481         }
13482       else if (strcasecmp (arg, "memory") == 0)
13483         lfence_before_indirect_branch = lfence_branch_memory;
13484       else if (strcasecmp (arg, "register") == 0)
13485         lfence_before_indirect_branch = lfence_branch_register;
13486       else if (strcasecmp (arg, "none") == 0)
13487         lfence_before_indirect_branch = lfence_branch_none;
13488       else
13489         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13490                   arg);
13491       break;
13492
13493     case OPTION_MLFENCE_BEFORE_RET:
13494       if (strcasecmp (arg, "or") == 0)
13495         lfence_before_ret = lfence_before_ret_or;
13496       else if (strcasecmp (arg, "not") == 0)
13497         lfence_before_ret = lfence_before_ret_not;
13498       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13499         lfence_before_ret = lfence_before_ret_shl;
13500       else if (strcasecmp (arg, "none") == 0)
13501         lfence_before_ret = lfence_before_ret_none;
13502       else
13503         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13504                   arg);
13505       break;
13506
13507     case OPTION_MRELAX_RELOCATIONS:
13508       if (strcasecmp (arg, "yes") == 0)
13509         generate_relax_relocations = 1;
13510       else if (strcasecmp (arg, "no") == 0)
13511         generate_relax_relocations = 0;
13512       else
13513         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13514       break;
13515
13516     case OPTION_MALIGN_BRANCH_BOUNDARY:
13517       {
13518         char *end;
13519         long int align = strtoul (arg, &end, 0);
13520         if (*end == '\0')
13521           {
13522             if (align == 0)
13523               {
13524                 align_branch_power = 0;
13525                 break;
13526               }
13527             else if (align >= 16)
13528               {
13529                 int align_power;
13530                 for (align_power = 0;
13531                      (align & 1) == 0;
13532                      align >>= 1, align_power++)
13533                   continue;
13534                 /* Limit alignment power to 31.  */
13535                 if (align == 1 && align_power < 32)
13536                   {
13537                     align_branch_power = align_power;
13538                     break;
13539                   }
13540               }
13541           }
13542         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13543       }
13544       break;
13545
13546     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13547       {
13548         char *end;
13549         int align = strtoul (arg, &end, 0);
13550         /* Some processors only support 5 prefixes.  */
13551         if (*end == '\0' && align >= 0 && align < 6)
13552           {
13553             align_branch_prefix_size = align;
13554             break;
13555           }
13556         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13557                   arg);
13558       }
13559       break;
13560
13561     case OPTION_MALIGN_BRANCH:
13562       align_branch = 0;
13563       saved = xstrdup (arg);
13564       type = saved;
13565       do
13566         {
13567           next = strchr (type, '+');
13568           if (next)
13569             *next++ = '\0';
13570           if (strcasecmp (type, "jcc") == 0)
13571             align_branch |= align_branch_jcc_bit;
13572           else if (strcasecmp (type, "fused") == 0)
13573             align_branch |= align_branch_fused_bit;
13574           else if (strcasecmp (type, "jmp") == 0)
13575             align_branch |= align_branch_jmp_bit;
13576           else if (strcasecmp (type, "call") == 0)
13577             align_branch |= align_branch_call_bit;
13578           else if (strcasecmp (type, "ret") == 0)
13579             align_branch |= align_branch_ret_bit;
13580           else if (strcasecmp (type, "indirect") == 0)
13581             align_branch |= align_branch_indirect_bit;
13582           else
13583             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13584           type = next;
13585         }
13586       while (next != NULL);
13587       free (saved);
13588       break;
13589
13590     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13591       align_branch_power = 5;
13592       align_branch_prefix_size = 5;
13593       align_branch = (align_branch_jcc_bit
13594                       | align_branch_fused_bit
13595                       | align_branch_jmp_bit);
13596       break;
13597
13598     case OPTION_MAMD64:
13599       isa64 = amd64;
13600       break;
13601
13602     case OPTION_MINTEL64:
13603       isa64 = intel64;
13604       break;
13605
13606     case 'O':
13607       if (arg == NULL)
13608         {
13609           optimize = 1;
13610           /* Turn off -Os.  */
13611           optimize_for_space = 0;
13612         }
13613       else if (*arg == 's')
13614         {
13615           optimize_for_space = 1;
13616           /* Turn on all encoding optimizations.  */
13617           optimize = INT_MAX;
13618         }
13619       else
13620         {
13621           optimize = atoi (arg);
13622           /* Turn off -Os.  */
13623           optimize_for_space = 0;
13624         }
13625       break;
13626
13627     default:
13628       return 0;
13629     }
13630   return 1;
13631 }
13632
13633 #define MESSAGE_TEMPLATE \
13634 "                                                                                "
13635
13636 static char *
13637 output_message (FILE *stream, char *p, char *message, char *start,
13638                 int *left_p, const char *name, int len)
13639 {
13640   int size = sizeof (MESSAGE_TEMPLATE);
13641   int left = *left_p;
13642
13643   /* Reserve 2 spaces for ", " or ",\0" */
13644   left -= len + 2;
13645
13646   /* Check if there is any room.  */
13647   if (left >= 0)
13648     {
13649       if (p != start)
13650         {
13651           *p++ = ',';
13652           *p++ = ' ';
13653         }
13654       p = mempcpy (p, name, len);
13655     }
13656   else
13657     {
13658       /* Output the current message now and start a new one.  */
13659       *p++ = ',';
13660       *p = '\0';
13661       fprintf (stream, "%s\n", message);
13662       p = start;
13663       left = size - (start - message) - len - 2;
13664
13665       gas_assert (left >= 0);
13666
13667       p = mempcpy (p, name, len);
13668     }
13669
13670   *left_p = left;
13671   return p;
13672 }
13673
13674 static void
13675 show_arch (FILE *stream, int ext, int check)
13676 {
13677   static char message[] = MESSAGE_TEMPLATE;
13678   char *start = message + 27;
13679   char *p;
13680   int size = sizeof (MESSAGE_TEMPLATE);
13681   int left;
13682   const char *name;
13683   int len;
13684   unsigned int j;
13685
13686   p = start;
13687   left = size - (start - message);
13688   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13689     {
13690       /* Should it be skipped?  */
13691       if (cpu_arch [j].skip)
13692         continue;
13693
13694       name = cpu_arch [j].name;
13695       len = cpu_arch [j].len;
13696       if (*name == '.')
13697         {
13698           /* It is an extension.  Skip if we aren't asked to show it.  */
13699           if (ext)
13700             {
13701               name++;
13702               len--;
13703             }
13704           else
13705             continue;
13706         }
13707       else if (ext)
13708         {
13709           /* It is an processor.  Skip if we show only extension.  */
13710           continue;
13711         }
13712       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
13713         {
13714           /* It is an impossible processor - skip.  */
13715           continue;
13716         }
13717
13718       p = output_message (stream, p, message, start, &left, name, len);
13719     }
13720
13721   /* Display disabled extensions.  */
13722   if (ext)
13723     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
13724       {
13725         name = cpu_noarch [j].name;
13726         len = cpu_noarch [j].len;
13727         p = output_message (stream, p, message, start, &left, name,
13728                             len);
13729       }
13730
13731   *p = '\0';
13732   fprintf (stream, "%s\n", message);
13733 }
13734
13735 void
13736 md_show_usage (FILE *stream)
13737 {
13738 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13739   fprintf (stream, _("\
13740   -Qy, -Qn                ignored\n\
13741   -V                      print assembler version number\n\
13742   -k                      ignored\n"));
13743 #endif
13744   fprintf (stream, _("\
13745   -n                      Do not optimize code alignment\n\
13746   -q                      quieten some warnings\n"));
13747 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13748   fprintf (stream, _("\
13749   -s                      ignored\n"));
13750 #endif
13751 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13752                       || defined (TE_PE) || defined (TE_PEP))
13753   fprintf (stream, _("\
13754   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
13755 #endif
13756 #ifdef SVR4_COMMENT_CHARS
13757   fprintf (stream, _("\
13758   --divide                do not treat `/' as a comment character\n"));
13759 #else
13760   fprintf (stream, _("\
13761   --divide                ignored\n"));
13762 #endif
13763   fprintf (stream, _("\
13764   -march=CPU[,+EXTENSION...]\n\
13765                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13766   show_arch (stream, 0, 1);
13767   fprintf (stream, _("\
13768                           EXTENSION is combination of:\n"));
13769   show_arch (stream, 1, 0);
13770   fprintf (stream, _("\
13771   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13772   show_arch (stream, 0, 0);
13773   fprintf (stream, _("\
13774   -msse2avx               encode SSE instructions with VEX prefix\n"));
13775   fprintf (stream, _("\
13776   -msse-check=[none|error|warning] (default: warning)\n\
13777                           check SSE instructions\n"));
13778   fprintf (stream, _("\
13779   -moperand-check=[none|error|warning] (default: warning)\n\
13780                           check operand combinations for validity\n"));
13781   fprintf (stream, _("\
13782   -mavxscalar=[128|256] (default: 128)\n\
13783                           encode scalar AVX instructions with specific vector\n\
13784                            length\n"));
13785   fprintf (stream, _("\
13786   -mvexwig=[0|1] (default: 0)\n\
13787                           encode VEX instructions with specific VEX.W value\n\
13788                            for VEX.W bit ignored instructions\n"));
13789   fprintf (stream, _("\
13790   -mevexlig=[128|256|512] (default: 128)\n\
13791                           encode scalar EVEX instructions with specific vector\n\
13792                            length\n"));
13793   fprintf (stream, _("\
13794   -mevexwig=[0|1] (default: 0)\n\
13795                           encode EVEX instructions with specific EVEX.W value\n\
13796                            for EVEX.W bit ignored instructions\n"));
13797   fprintf (stream, _("\
13798   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13799                           encode EVEX instructions with specific EVEX.RC value\n\
13800                            for SAE-only ignored instructions\n"));
13801   fprintf (stream, _("\
13802   -mmnemonic=[att|intel] "));
13803   if (SYSV386_COMPAT)
13804     fprintf (stream, _("(default: att)\n"));
13805   else
13806     fprintf (stream, _("(default: intel)\n"));
13807   fprintf (stream, _("\
13808                           use AT&T/Intel mnemonic\n"));
13809   fprintf (stream, _("\
13810   -msyntax=[att|intel] (default: att)\n\
13811                           use AT&T/Intel syntax\n"));
13812   fprintf (stream, _("\
13813   -mindex-reg             support pseudo index registers\n"));
13814   fprintf (stream, _("\
13815   -mnaked-reg             don't require `%%' prefix for registers\n"));
13816   fprintf (stream, _("\
13817   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13818 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13819   fprintf (stream, _("\
13820   -mshared                disable branch optimization for shared code\n"));
13821   fprintf (stream, _("\
13822   -mx86-used-note=[no|yes] "));
13823   if (DEFAULT_X86_USED_NOTE)
13824     fprintf (stream, _("(default: yes)\n"));
13825   else
13826     fprintf (stream, _("(default: no)\n"));
13827   fprintf (stream, _("\
13828                           generate x86 used ISA and feature properties\n"));
13829 #endif
13830 #if defined (TE_PE) || defined (TE_PEP)
13831   fprintf (stream, _("\
13832   -mbig-obj               generate big object files\n"));
13833 #endif
13834   fprintf (stream, _("\
13835   -momit-lock-prefix=[no|yes] (default: no)\n\
13836                           strip all lock prefixes\n"));
13837   fprintf (stream, _("\
13838   -mfence-as-lock-add=[no|yes] (default: no)\n\
13839                           encode lfence, mfence and sfence as\n\
13840                            lock addl $0x0, (%%{re}sp)\n"));
13841   fprintf (stream, _("\
13842   -mrelax-relocations=[no|yes] "));
13843   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13844     fprintf (stream, _("(default: yes)\n"));
13845   else
13846     fprintf (stream, _("(default: no)\n"));
13847   fprintf (stream, _("\
13848                           generate relax relocations\n"));
13849   fprintf (stream, _("\
13850   -malign-branch-boundary=NUM (default: 0)\n\
13851                           align branches within NUM byte boundary\n"));
13852   fprintf (stream, _("\
13853   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13854                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13855                            indirect\n\
13856                           specify types of branches to align\n"));
13857   fprintf (stream, _("\
13858   -malign-branch-prefix-size=NUM (default: 5)\n\
13859                           align branches with NUM prefixes per instruction\n"));
13860   fprintf (stream, _("\
13861   -mbranches-within-32B-boundaries\n\
13862                           align branches within 32 byte boundary\n"));
13863   fprintf (stream, _("\
13864   -mlfence-after-load=[no|yes] (default: no)\n\
13865                           generate lfence after load\n"));
13866   fprintf (stream, _("\
13867   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
13868                           generate lfence before indirect near branch\n"));
13869   fprintf (stream, _("\
13870   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
13871                           generate lfence before ret\n"));
13872   fprintf (stream, _("\
13873   -mamd64                 accept only AMD64 ISA [default]\n"));
13874   fprintf (stream, _("\
13875   -mintel64               accept only Intel64 ISA\n"));
13876 }
13877
13878 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13879      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13880      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13881
13882 /* Pick the target format to use.  */
13883
13884 const char *
13885 i386_target_format (void)
13886 {
13887   if (startswith (default_arch, "x86_64"))
13888     {
13889       update_code_flag (CODE_64BIT, 1);
13890       if (default_arch[6] == '\0')
13891         x86_elf_abi = X86_64_ABI;
13892       else
13893         x86_elf_abi = X86_64_X32_ABI;
13894     }
13895   else if (!strcmp (default_arch, "i386"))
13896     update_code_flag (CODE_32BIT, 1);
13897   else if (!strcmp (default_arch, "iamcu"))
13898     {
13899       update_code_flag (CODE_32BIT, 1);
13900       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
13901         {
13902           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
13903           cpu_arch_name = "iamcu";
13904           cpu_sub_arch_name = NULL;
13905           cpu_arch_flags = iamcu_flags;
13906           cpu_arch_isa = PROCESSOR_IAMCU;
13907           cpu_arch_isa_flags = iamcu_flags;
13908           if (!cpu_arch_tune_set)
13909             {
13910               cpu_arch_tune = cpu_arch_isa;
13911               cpu_arch_tune_flags = cpu_arch_isa_flags;
13912             }
13913         }
13914       else if (cpu_arch_isa != PROCESSOR_IAMCU)
13915         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
13916                   cpu_arch_name);
13917     }
13918   else
13919     as_fatal (_("unknown architecture"));
13920
13921   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
13922     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13923   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
13924     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13925
13926   switch (OUTPUT_FLAVOR)
13927     {
13928 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
13929     case bfd_target_aout_flavour:
13930       return AOUT_TARGET_FORMAT;
13931 #endif
13932 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
13933 # if defined (TE_PE) || defined (TE_PEP)
13934     case bfd_target_coff_flavour:
13935       if (flag_code == CODE_64BIT)
13936         {
13937           object_64bit = 1;
13938           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
13939         }
13940       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
13941 # elif defined (TE_GO32)
13942     case bfd_target_coff_flavour:
13943       return "coff-go32";
13944 # else
13945     case bfd_target_coff_flavour:
13946       return "coff-i386";
13947 # endif
13948 #endif
13949 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13950     case bfd_target_elf_flavour:
13951       {
13952         const char *format;
13953
13954         switch (x86_elf_abi)
13955           {
13956           default:
13957             format = ELF_TARGET_FORMAT;
13958 #ifndef TE_SOLARIS
13959             tls_get_addr = "___tls_get_addr";
13960 #endif
13961             break;
13962           case X86_64_ABI:
13963             use_rela_relocations = 1;
13964             object_64bit = 1;
13965 #ifndef TE_SOLARIS
13966             tls_get_addr = "__tls_get_addr";
13967 #endif
13968             format = ELF_TARGET_FORMAT64;
13969             break;
13970           case X86_64_X32_ABI:
13971             use_rela_relocations = 1;
13972             object_64bit = 1;
13973 #ifndef TE_SOLARIS
13974             tls_get_addr = "__tls_get_addr";
13975 #endif
13976             disallow_64bit_reloc = 1;
13977             format = ELF_TARGET_FORMAT32;
13978             break;
13979           }
13980         if (cpu_arch_isa == PROCESSOR_L1OM)
13981           {
13982             if (x86_elf_abi != X86_64_ABI)
13983               as_fatal (_("Intel L1OM is 64bit only"));
13984             return ELF_TARGET_L1OM_FORMAT;
13985           }
13986         else if (cpu_arch_isa == PROCESSOR_K1OM)
13987           {
13988             if (x86_elf_abi != X86_64_ABI)
13989               as_fatal (_("Intel K1OM is 64bit only"));
13990             return ELF_TARGET_K1OM_FORMAT;
13991           }
13992         else if (cpu_arch_isa == PROCESSOR_IAMCU)
13993           {
13994             if (x86_elf_abi != I386_ABI)
13995               as_fatal (_("Intel MCU is 32bit only"));
13996             return ELF_TARGET_IAMCU_FORMAT;
13997           }
13998         else
13999           return format;
14000       }
14001 #endif
14002 #if defined (OBJ_MACH_O)
14003     case bfd_target_mach_o_flavour:
14004       if (flag_code == CODE_64BIT)
14005         {
14006           use_rela_relocations = 1;
14007           object_64bit = 1;
14008           return "mach-o-x86-64";
14009         }
14010       else
14011         return "mach-o-i386";
14012 #endif
14013     default:
14014       abort ();
14015       return NULL;
14016     }
14017 }
14018
14019 #endif /* OBJ_MAYBE_ more than one  */
14020 \f
14021 symbolS *
14022 md_undefined_symbol (char *name)
14023 {
14024   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14025       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14026       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14027       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14028     {
14029       if (!GOT_symbol)
14030         {
14031           if (symbol_find (name))
14032             as_bad (_("GOT already in symbol table"));
14033           GOT_symbol = symbol_new (name, undefined_section,
14034                                    &zero_address_frag, 0);
14035         };
14036       return GOT_symbol;
14037     }
14038   return 0;
14039 }
14040
14041 /* Round up a section size to the appropriate boundary.  */
14042
14043 valueT
14044 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14045 {
14046 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14047   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14048     {
14049       /* For a.out, force the section size to be aligned.  If we don't do
14050          this, BFD will align it for us, but it will not write out the
14051          final bytes of the section.  This may be a bug in BFD, but it is
14052          easier to fix it here since that is how the other a.out targets
14053          work.  */
14054       int align;
14055
14056       align = bfd_section_alignment (segment);
14057       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14058     }
14059 #endif
14060
14061   return size;
14062 }
14063
14064 /* On the i386, PC-relative offsets are relative to the start of the
14065    next instruction.  That is, the address of the offset, plus its
14066    size, since the offset is always the last part of the insn.  */
14067
14068 long
14069 md_pcrel_from (fixS *fixP)
14070 {
14071   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14072 }
14073
14074 #ifndef I386COFF
14075
14076 static void
14077 s_bss (int ignore ATTRIBUTE_UNUSED)
14078 {
14079   int temp;
14080
14081 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14082   if (IS_ELF)
14083     obj_elf_section_change_hook ();
14084 #endif
14085   temp = get_absolute_expression ();
14086   subseg_set (bss_section, (subsegT) temp);
14087   demand_empty_rest_of_line ();
14088 }
14089
14090 #endif
14091
14092 /* Remember constant directive.  */
14093
14094 void
14095 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14096 {
14097   if (last_insn.kind != last_insn_directive
14098       && (bfd_section_flags (now_seg) & SEC_CODE))
14099     {
14100       last_insn.seg = now_seg;
14101       last_insn.kind = last_insn_directive;
14102       last_insn.name = "constant directive";
14103       last_insn.file = as_where (&last_insn.line);
14104       if (lfence_before_ret != lfence_before_ret_none)
14105         {
14106           if (lfence_before_indirect_branch != lfence_branch_none)
14107             as_warn (_("constant directive skips -mlfence-before-ret "
14108                        "and -mlfence-before-indirect-branch"));
14109           else
14110             as_warn (_("constant directive skips -mlfence-before-ret"));
14111         }
14112       else if (lfence_before_indirect_branch != lfence_branch_none)
14113         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14114     }
14115 }
14116
14117 int
14118 i386_validate_fix (fixS *fixp)
14119 {
14120 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14121   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14122       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14123     return IS_ELF && fixp->fx_addsy
14124            && (!S_IS_DEFINED (fixp->fx_addsy)
14125                || S_IS_EXTERNAL (fixp->fx_addsy));
14126 #endif
14127
14128   if (fixp->fx_subsy)
14129     {
14130       if (fixp->fx_subsy == GOT_symbol)
14131         {
14132           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14133             {
14134               if (!object_64bit)
14135                 abort ();
14136 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14137               if (fixp->fx_tcbit2)
14138                 fixp->fx_r_type = (fixp->fx_tcbit
14139                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14140                                    : BFD_RELOC_X86_64_GOTPCRELX);
14141               else
14142 #endif
14143                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14144             }
14145           else
14146             {
14147               if (!object_64bit)
14148                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14149               else
14150                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14151             }
14152           fixp->fx_subsy = 0;
14153         }
14154     }
14155 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14156   else
14157     {
14158       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14159          to section.  Since PLT32 relocation must be against symbols,
14160          turn such PLT32 relocation into PC32 relocation.  */
14161       if (fixp->fx_addsy
14162           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14163               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14164           && symbol_section_p (fixp->fx_addsy))
14165         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14166       if (!object_64bit)
14167         {
14168           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14169               && fixp->fx_tcbit2)
14170             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14171         }
14172     }
14173 #endif
14174
14175   return 1;
14176 }
14177
14178 arelent *
14179 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14180 {
14181   arelent *rel;
14182   bfd_reloc_code_real_type code;
14183
14184   switch (fixp->fx_r_type)
14185     {
14186 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14187       symbolS *sym;
14188
14189     case BFD_RELOC_SIZE32:
14190     case BFD_RELOC_SIZE64:
14191       if (fixp->fx_addsy
14192           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14193           && (!fixp->fx_subsy
14194               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14195         sym = fixp->fx_addsy;
14196       else if (fixp->fx_subsy
14197                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14198                && (!fixp->fx_addsy
14199                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14200         sym = fixp->fx_subsy;
14201       else
14202         sym = NULL;
14203       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14204         {
14205           /* Resolve size relocation against local symbol to size of
14206              the symbol plus addend.  */
14207           valueT value = S_GET_SIZE (sym);
14208
14209           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14210             value = bfd_section_size (S_GET_SEGMENT (sym));
14211           if (sym == fixp->fx_subsy)
14212             {
14213               value = -value;
14214               if (fixp->fx_addsy)
14215                 value += S_GET_VALUE (fixp->fx_addsy);
14216             }
14217           else if (fixp->fx_subsy)
14218             value -= S_GET_VALUE (fixp->fx_subsy);
14219           value += fixp->fx_offset;
14220           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14221               && object_64bit
14222               && !fits_in_unsigned_long (value))
14223             as_bad_where (fixp->fx_file, fixp->fx_line,
14224                           _("symbol size computation overflow"));
14225           fixp->fx_addsy = NULL;
14226           fixp->fx_subsy = NULL;
14227           md_apply_fix (fixp, (valueT *) &value, NULL);
14228           return NULL;
14229         }
14230       if (!fixp->fx_addsy || fixp->fx_subsy)
14231         {
14232           as_bad_where (fixp->fx_file, fixp->fx_line,
14233                         "unsupported expression involving @size");
14234           return NULL;
14235         }
14236 #endif
14237       /* Fall through.  */
14238
14239     case BFD_RELOC_X86_64_PLT32:
14240     case BFD_RELOC_X86_64_GOT32:
14241     case BFD_RELOC_X86_64_GOTPCREL:
14242     case BFD_RELOC_X86_64_GOTPCRELX:
14243     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14244     case BFD_RELOC_386_PLT32:
14245     case BFD_RELOC_386_GOT32:
14246     case BFD_RELOC_386_GOT32X:
14247     case BFD_RELOC_386_GOTOFF:
14248     case BFD_RELOC_386_GOTPC:
14249     case BFD_RELOC_386_TLS_GD:
14250     case BFD_RELOC_386_TLS_LDM:
14251     case BFD_RELOC_386_TLS_LDO_32:
14252     case BFD_RELOC_386_TLS_IE_32:
14253     case BFD_RELOC_386_TLS_IE:
14254     case BFD_RELOC_386_TLS_GOTIE:
14255     case BFD_RELOC_386_TLS_LE_32:
14256     case BFD_RELOC_386_TLS_LE:
14257     case BFD_RELOC_386_TLS_GOTDESC:
14258     case BFD_RELOC_386_TLS_DESC_CALL:
14259     case BFD_RELOC_X86_64_TLSGD:
14260     case BFD_RELOC_X86_64_TLSLD:
14261     case BFD_RELOC_X86_64_DTPOFF32:
14262     case BFD_RELOC_X86_64_DTPOFF64:
14263     case BFD_RELOC_X86_64_GOTTPOFF:
14264     case BFD_RELOC_X86_64_TPOFF32:
14265     case BFD_RELOC_X86_64_TPOFF64:
14266     case BFD_RELOC_X86_64_GOTOFF64:
14267     case BFD_RELOC_X86_64_GOTPC32:
14268     case BFD_RELOC_X86_64_GOT64:
14269     case BFD_RELOC_X86_64_GOTPCREL64:
14270     case BFD_RELOC_X86_64_GOTPC64:
14271     case BFD_RELOC_X86_64_GOTPLT64:
14272     case BFD_RELOC_X86_64_PLTOFF64:
14273     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14274     case BFD_RELOC_X86_64_TLSDESC_CALL:
14275     case BFD_RELOC_RVA:
14276     case BFD_RELOC_VTABLE_ENTRY:
14277     case BFD_RELOC_VTABLE_INHERIT:
14278 #ifdef TE_PE
14279     case BFD_RELOC_32_SECREL:
14280 #endif
14281       code = fixp->fx_r_type;
14282       break;
14283     case BFD_RELOC_X86_64_32S:
14284       if (!fixp->fx_pcrel)
14285         {
14286           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14287           code = fixp->fx_r_type;
14288           break;
14289         }
14290       /* Fall through.  */
14291     default:
14292       if (fixp->fx_pcrel)
14293         {
14294           switch (fixp->fx_size)
14295             {
14296             default:
14297               as_bad_where (fixp->fx_file, fixp->fx_line,
14298                             _("can not do %d byte pc-relative relocation"),
14299                             fixp->fx_size);
14300               code = BFD_RELOC_32_PCREL;
14301               break;
14302             case 1: code = BFD_RELOC_8_PCREL;  break;
14303             case 2: code = BFD_RELOC_16_PCREL; break;
14304             case 4: code = BFD_RELOC_32_PCREL; break;
14305 #ifdef BFD64
14306             case 8: code = BFD_RELOC_64_PCREL; break;
14307 #endif
14308             }
14309         }
14310       else
14311         {
14312           switch (fixp->fx_size)
14313             {
14314             default:
14315               as_bad_where (fixp->fx_file, fixp->fx_line,
14316                             _("can not do %d byte relocation"),
14317                             fixp->fx_size);
14318               code = BFD_RELOC_32;
14319               break;
14320             case 1: code = BFD_RELOC_8;  break;
14321             case 2: code = BFD_RELOC_16; break;
14322             case 4: code = BFD_RELOC_32; break;
14323 #ifdef BFD64
14324             case 8: code = BFD_RELOC_64; break;
14325 #endif
14326             }
14327         }
14328       break;
14329     }
14330
14331   if ((code == BFD_RELOC_32
14332        || code == BFD_RELOC_32_PCREL
14333        || code == BFD_RELOC_X86_64_32S)
14334       && GOT_symbol
14335       && fixp->fx_addsy == GOT_symbol)
14336     {
14337       if (!object_64bit)
14338         code = BFD_RELOC_386_GOTPC;
14339       else
14340         code = BFD_RELOC_X86_64_GOTPC32;
14341     }
14342   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14343       && GOT_symbol
14344       && fixp->fx_addsy == GOT_symbol)
14345     {
14346       code = BFD_RELOC_X86_64_GOTPC64;
14347     }
14348
14349   rel = XNEW (arelent);
14350   rel->sym_ptr_ptr = XNEW (asymbol *);
14351   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14352
14353   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14354
14355   if (!use_rela_relocations)
14356     {
14357       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14358          vtable entry to be used in the relocation's section offset.  */
14359       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14360         rel->address = fixp->fx_offset;
14361 #if defined (OBJ_COFF) && defined (TE_PE)
14362       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14363         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14364       else
14365 #endif
14366       rel->addend = 0;
14367     }
14368   /* Use the rela in 64bit mode.  */
14369   else
14370     {
14371       if (disallow_64bit_reloc)
14372         switch (code)
14373           {
14374           case BFD_RELOC_X86_64_DTPOFF64:
14375           case BFD_RELOC_X86_64_TPOFF64:
14376           case BFD_RELOC_64_PCREL:
14377           case BFD_RELOC_X86_64_GOTOFF64:
14378           case BFD_RELOC_X86_64_GOT64:
14379           case BFD_RELOC_X86_64_GOTPCREL64:
14380           case BFD_RELOC_X86_64_GOTPC64:
14381           case BFD_RELOC_X86_64_GOTPLT64:
14382           case BFD_RELOC_X86_64_PLTOFF64:
14383             as_bad_where (fixp->fx_file, fixp->fx_line,
14384                           _("cannot represent relocation type %s in x32 mode"),
14385                           bfd_get_reloc_code_name (code));
14386             break;
14387           default:
14388             break;
14389           }
14390
14391       if (!fixp->fx_pcrel)
14392         rel->addend = fixp->fx_offset;
14393       else
14394         switch (code)
14395           {
14396           case BFD_RELOC_X86_64_PLT32:
14397           case BFD_RELOC_X86_64_GOT32:
14398           case BFD_RELOC_X86_64_GOTPCREL:
14399           case BFD_RELOC_X86_64_GOTPCRELX:
14400           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14401           case BFD_RELOC_X86_64_TLSGD:
14402           case BFD_RELOC_X86_64_TLSLD:
14403           case BFD_RELOC_X86_64_GOTTPOFF:
14404           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14405           case BFD_RELOC_X86_64_TLSDESC_CALL:
14406             rel->addend = fixp->fx_offset - fixp->fx_size;
14407             break;
14408           default:
14409             rel->addend = (section->vma
14410                            - fixp->fx_size
14411                            + fixp->fx_addnumber
14412                            + md_pcrel_from (fixp));
14413             break;
14414           }
14415     }
14416
14417   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14418   if (rel->howto == NULL)
14419     {
14420       as_bad_where (fixp->fx_file, fixp->fx_line,
14421                     _("cannot represent relocation type %s"),
14422                     bfd_get_reloc_code_name (code));
14423       /* Set howto to a garbage value so that we can keep going.  */
14424       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14425       gas_assert (rel->howto != NULL);
14426     }
14427
14428   return rel;
14429 }
14430
14431 #include "tc-i386-intel.c"
14432
14433 void
14434 tc_x86_parse_to_dw2regnum (expressionS *exp)
14435 {
14436   int saved_naked_reg;
14437   char saved_register_dot;
14438
14439   saved_naked_reg = allow_naked_reg;
14440   allow_naked_reg = 1;
14441   saved_register_dot = register_chars['.'];
14442   register_chars['.'] = '.';
14443   allow_pseudo_reg = 1;
14444   expression_and_evaluate (exp);
14445   allow_pseudo_reg = 0;
14446   register_chars['.'] = saved_register_dot;
14447   allow_naked_reg = saved_naked_reg;
14448
14449   if (exp->X_op == O_register && exp->X_add_number >= 0)
14450     {
14451       if ((addressT) exp->X_add_number < i386_regtab_size)
14452         {
14453           exp->X_op = O_constant;
14454           exp->X_add_number = i386_regtab[exp->X_add_number]
14455                               .dw2_regnum[flag_code >> 1];
14456         }
14457       else
14458         exp->X_op = O_illegal;
14459     }
14460 }
14461
14462 void
14463 tc_x86_frame_initial_instructions (void)
14464 {
14465   static unsigned int sp_regno[2];
14466
14467   if (!sp_regno[flag_code >> 1])
14468     {
14469       char *saved_input = input_line_pointer;
14470       char sp[][4] = {"esp", "rsp"};
14471       expressionS exp;
14472
14473       input_line_pointer = sp[flag_code >> 1];
14474       tc_x86_parse_to_dw2regnum (&exp);
14475       gas_assert (exp.X_op == O_constant);
14476       sp_regno[flag_code >> 1] = exp.X_add_number;
14477       input_line_pointer = saved_input;
14478     }
14479
14480   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14481   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14482 }
14483
14484 int
14485 x86_dwarf2_addr_size (void)
14486 {
14487 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14488   if (x86_elf_abi == X86_64_X32_ABI)
14489     return 4;
14490 #endif
14491   return bfd_arch_bits_per_address (stdoutput) / 8;
14492 }
14493
14494 int
14495 i386_elf_section_type (const char *str, size_t len)
14496 {
14497   if (flag_code == CODE_64BIT
14498       && len == sizeof ("unwind") - 1
14499       && startswith (str, "unwind"))
14500     return SHT_X86_64_UNWIND;
14501
14502   return -1;
14503 }
14504
14505 #ifdef TE_SOLARIS
14506 void
14507 i386_solaris_fix_up_eh_frame (segT sec)
14508 {
14509   if (flag_code == CODE_64BIT)
14510     elf_section_type (sec) = SHT_X86_64_UNWIND;
14511 }
14512 #endif
14513
14514 #ifdef TE_PE
14515 void
14516 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14517 {
14518   expressionS exp;
14519
14520   exp.X_op = O_secrel;
14521   exp.X_add_symbol = symbol;
14522   exp.X_add_number = 0;
14523   emit_expr (&exp, size);
14524 }
14525 #endif
14526
14527 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14528 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14529
14530 bfd_vma
14531 x86_64_section_letter (int letter, const char **ptr_msg)
14532 {
14533   if (flag_code == CODE_64BIT)
14534     {
14535       if (letter == 'l')
14536         return SHF_X86_64_LARGE;
14537
14538       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14539     }
14540   else
14541     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14542   return -1;
14543 }
14544
14545 bfd_vma
14546 x86_64_section_word (char *str, size_t len)
14547 {
14548   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14549     return SHF_X86_64_LARGE;
14550
14551   return -1;
14552 }
14553
14554 static void
14555 handle_large_common (int small ATTRIBUTE_UNUSED)
14556 {
14557   if (flag_code != CODE_64BIT)
14558     {
14559       s_comm_internal (0, elf_common_parse);
14560       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14561     }
14562   else
14563     {
14564       static segT lbss_section;
14565       asection *saved_com_section_ptr = elf_com_section_ptr;
14566       asection *saved_bss_section = bss_section;
14567
14568       if (lbss_section == NULL)
14569         {
14570           flagword applicable;
14571           segT seg = now_seg;
14572           subsegT subseg = now_subseg;
14573
14574           /* The .lbss section is for local .largecomm symbols.  */
14575           lbss_section = subseg_new (".lbss", 0);
14576           applicable = bfd_applicable_section_flags (stdoutput);
14577           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14578           seg_info (lbss_section)->bss = 1;
14579
14580           subseg_set (seg, subseg);
14581         }
14582
14583       elf_com_section_ptr = &_bfd_elf_large_com_section;
14584       bss_section = lbss_section;
14585
14586       s_comm_internal (0, elf_common_parse);
14587
14588       elf_com_section_ptr = saved_com_section_ptr;
14589       bss_section = saved_bss_section;
14590     }
14591 }
14592 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */