gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2020 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus ([email protected]).
  23    x86_64 support by Jan Hubicka ([email protected])
  24    VIA PadLock support by Michal Ludvig ([email protected])
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35
  36 #ifdef HAVE_LIMITS_H
  37 #include <limits.h>
  38 #else
  39 #ifdef HAVE_SYS_PARAM_H
  40 #include <sys/param.h>
  41 #endif
  42 #ifndef INT_MAX
  43 #define INT_MAX (int) (((unsigned) (-1)) >> 1)
  44 #endif
  45 #endif
  46
  47 #ifndef INFER_ADDR_PREFIX
  48 #define INFER_ADDR_PREFIX 1
  49 #endif
  50
  51 #ifndef DEFAULT_ARCH
  52 #define DEFAULT_ARCH "i386"
  53 #endif
  54
  55 #ifndef INLINE
  56 #if __GNUC__ >= 2
  57 #define INLINE __inline__
  58 #else
  59 #define INLINE
  60 #endif
  61 #endif
  62
  63 /* Prefixes will be emitted in the order defined below.
  64    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  65    instruction, and so must come before any prefixes.
  66    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  67    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  68 #define WAIT_PREFIX     0
  69 #define SEG_PREFIX      1
  70 #define ADDR_PREFIX     2
  71 #define DATA_PREFIX     3
  72 #define REP_PREFIX      4
  73 #define HLE_PREFIX      REP_PREFIX
  74 #define BND_PREFIX      REP_PREFIX
  75 #define LOCK_PREFIX     5
  76 #define REX_PREFIX      6       /* must come last.  */
  77 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  78
  79 /* we define the syntax here (modulo base,index,scale syntax) */
  80 #define REGISTER_PREFIX '%'
  81 #define IMMEDIATE_PREFIX '$'
  82 #define ABSOLUTE_PREFIX '*'
  83
  84 /* these are the instruction mnemonic suffixes in AT&T syntax or
  85    memory operand size in Intel syntax.  */
  86 #define WORD_MNEM_SUFFIX  'w'
  87 #define BYTE_MNEM_SUFFIX  'b'
  88 #define SHORT_MNEM_SUFFIX 's'
  89 #define LONG_MNEM_SUFFIX  'l'
  90 #define QWORD_MNEM_SUFFIX  'q'
  91 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  92    in instructions.  */
  93 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  94
  95 #define END_OF_INSN '\0'
  96
  97 /* This matches the C -> StaticRounding alias in the opcode table.  */
  98 #define commutative staticrounding
  99
 100 /*
 101   'templates' is for grouping together 'template' structures for opcodes
 102   of the same name.  This is only used for storing the insns in the grand
 103   ole hash table of insns.
 104   The templates themselves start at START and range up to (but not including)
 105   END.
 106   */
 107 typedef struct
 108 {
 109   const insn_template *start;
 110   const insn_template *end;
 111 }
 112 templates;
 113
 114 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 115 typedef struct
 116 {
 117   unsigned int regmem;  /* codes register or memory operand */
 118   unsigned int reg;     /* codes register operand (or extended opcode) */
 119   unsigned int mode;    /* how to interpret regmem & reg */
 120 }
 121 modrm_byte;
 122
 123 /* x86-64 extension prefix.  */
 124 typedef int rex_byte;
 125
 126 /* 386 opcode byte to code indirect addressing.  */
 127 typedef struct
 128 {
 129   unsigned base;
 130   unsigned index;
 131   unsigned scale;
 132 }
 133 sib_byte;
 134
 135 /* x86 arch names, types and features */
 136 typedef struct
 137 {
 138   const char *name;             /* arch name */
 139   unsigned int len;             /* arch string length */
 140   enum processor_type type;     /* arch type */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142   unsigned int skip;            /* show_arch should skip this. */
 143 }
 144 arch_entry;
 145
 146 /* Used to turn off indicated flags.  */
 147 typedef struct
 148 {
 149   const char *name;             /* arch name */
 150   unsigned int len;             /* arch string length */
 151   i386_cpu_flags flags;         /* cpu feature flags */
 152 }
 153 noarch_entry;
 154
 155 static void update_code_flag (int, int);
 156 static void set_code_flag (int);
 157 static void set_16bit_gcc_code_flag (int);
 158 static void set_intel_syntax (int);
 159 static void set_intel_mnemonic (int);
 160 static void set_allow_index_reg (int);
 161 static void set_check (int);
 162 static void set_cpu_arch (int);
 163 #ifdef TE_PE
 164 static void pe_directive_secrel (int);
 165 #endif
 166 static void signed_cons (int);
 167 static char *output_invalid (int c);
 168 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 169                                     const char *);
 170 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 171                                        const char *);
 172 static int i386_att_operand (char *);
 173 static int i386_intel_operand (char *, int);
 174 static int i386_intel_simplify (expressionS *);
 175 static int i386_intel_parse_name (const char *, expressionS *);
 176 static const reg_entry *parse_register (char *, char **);
 177 static char *parse_insn (char *, char *);
 178 static char *parse_operands (char *, const char *);
 179 static void swap_operands (void);
 180 static void swap_2_operands (int, int);
 181 static enum flag_code i386_addressing_mode (void);
 182 static void optimize_imm (void);
 183 static void optimize_disp (void);
 184 static const insn_template *match_template (char);
 185 static int check_string (void);
 186 static int process_suffix (void);
 187 static int check_byte_reg (void);
 188 static int check_long_reg (void);
 189 static int check_qword_reg (void);
 190 static int check_word_reg (void);
 191 static int finalize_imm (void);
 192 static int process_operands (void);
 193 static const seg_entry *build_modrm_byte (void);
 194 static void output_insn (void);
 195 static void output_imm (fragS *, offsetT);
 196 static void output_disp (fragS *, offsetT);
 197 #ifndef I386COFF
 198 static void s_bss (int);
 199 #endif
 200 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 201 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 202
 203 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 204 static unsigned int x86_isa_1_used;
 205 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 206 static unsigned int x86_feature_2_used;
 207 /* Generate x86 used ISA and feature properties.  */
 208 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 209 #endif
 210
 211 static const char *default_arch = DEFAULT_ARCH;
 212
 213 /* This struct describes rounding control and SAE in the instruction.  */
 214 struct RC_Operation
 215 {
 216   enum rc_type
 217     {
 218       rne = 0,
 219       rd,
 220       ru,
 221       rz,
 222       saeonly
 223     } type;
 224   int operand;
 225 };
 226
 227 static struct RC_Operation rc_op;
 228
 229 /* The struct describes masking, applied to OPERAND in the instruction.
 230    MASK is a pointer to the corresponding mask register.  ZEROING tells
 231    whether merging or zeroing mask is used.  */
 232 struct Mask_Operation
 233 {
 234   const reg_entry *mask;
 235   unsigned int zeroing;
 236   /* The operand where this operation is associated.  */
 237   int operand;
 238 };
 239
 240 static struct Mask_Operation mask_op;
 241
 242 /* The struct describes broadcasting, applied to OPERAND.  FACTOR is
 243    broadcast factor.  */
 244 struct Broadcast_Operation
 245 {
 246   /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
 247   int type;
 248
 249   /* Index of broadcasted operand.  */
 250   int operand;
 251
 252   /* Number of bytes to broadcast.  */
 253   int bytes;
 254 };
 255
 256 static struct Broadcast_Operation broadcast_op;
 257
 258 /* VEX prefix.  */
 259 typedef struct
 260 {
 261   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 262   unsigned char bytes[4];
 263   unsigned int length;
 264   /* Destination or source register specifier.  */
 265   const reg_entry *register_specifier;
 266 } vex_prefix;
 267
 268 /* 'md_assemble ()' gathers together information and puts it into a
 269    i386_insn.  */
 270
 271 union i386_op
 272   {
 273     expressionS *disps;
 274     expressionS *imms;
 275     const reg_entry *regs;
 276   };
 277
 278 enum i386_error
 279   {
 280     operand_size_mismatch,
 281     operand_type_mismatch,
 282     register_type_mismatch,
 283     number_of_operands_mismatch,
 284     invalid_instruction_suffix,
 285     bad_imm4,
 286     unsupported_with_intel_mnemonic,
 287     unsupported_syntax,
 288     unsupported,
 289     invalid_vsib_address,
 290     invalid_vector_register_set,
 291     unsupported_vector_index_register,
 292     unsupported_broadcast,
 293     broadcast_needed,
 294     unsupported_masking,
 295     mask_not_on_destination,
 296     no_default_mask,
 297     unsupported_rc_sae,
 298     rc_sae_operand_not_last_imm,
 299     invalid_register_operand,
 300   };
 301
 302 struct _i386_insn
 303   {
 304     /* TM holds the template for the insn were currently assembling.  */
 305     insn_template tm;
 306
 307     /* SUFFIX holds the instruction size suffix for byte, word, dword
 308        or qword, if given.  */
 309     char suffix;
 310
 311     /* OPERANDS gives the number of given operands.  */
 312     unsigned int operands;
 313
 314     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 315        of given register, displacement, memory operands and immediate
 316        operands.  */
 317     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 318
 319     /* TYPES [i] is the type (see above #defines) which tells us how to
 320        use OP[i] for the corresponding operand.  */
 321     i386_operand_type types[MAX_OPERANDS];
 322
 323     /* Displacement expression, immediate expression, or register for each
 324        operand.  */
 325     union i386_op op[MAX_OPERANDS];
 326
 327     /* Flags for operands.  */
 328     unsigned int flags[MAX_OPERANDS];
 329 #define Operand_PCrel 1
 330 #define Operand_Mem   2
 331
 332     /* Relocation type for operand */
 333     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 334
 335     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 336        the base index byte below.  */
 337     const reg_entry *base_reg;
 338     const reg_entry *index_reg;
 339     unsigned int log2_scale_factor;
 340
 341     /* SEG gives the seg_entries of this insn.  They are zero unless
 342        explicit segment overrides are given.  */
 343     const seg_entry *seg[2];
 344
 345     /* Copied first memory operand string, for re-checking.  */
 346     char *memop1_string;
 347
 348     /* PREFIX holds all the given prefix opcodes (usually null).
 349        PREFIXES is the number of prefix opcodes.  */
 350     unsigned int prefixes;
 351     unsigned char prefix[MAX_PREFIXES];
 352
 353     /* Register is in low 3 bits of opcode.  */
 354     bfd_boolean short_form;
 355
 356     /* The operand to a branch insn indicates an absolute branch.  */
 357     bfd_boolean jumpabsolute;
 358
 359     /* Has MMX register operands.  */
 360     bfd_boolean has_regmmx;
 361
 362     /* Has XMM register operands.  */
 363     bfd_boolean has_regxmm;
 364
 365     /* Has YMM register operands.  */
 366     bfd_boolean has_regymm;
 367
 368     /* Has ZMM register operands.  */
 369     bfd_boolean has_regzmm;
 370
 371     /* Has GOTPC or TLS relocation.  */
 372     bfd_boolean has_gotpc_tls_reloc;
 373
 374     /* RM and SIB are the modrm byte and the sib byte where the
 375        addressing modes of this insn are encoded.  */
 376     modrm_byte rm;
 377     rex_byte rex;
 378     rex_byte vrex;
 379     sib_byte sib;
 380     vex_prefix vex;
 381
 382     /* Masking attributes.  */
 383     struct Mask_Operation *mask;
 384
 385     /* Rounding control and SAE attributes.  */
 386     struct RC_Operation *rounding;
 387
 388     /* Broadcasting attributes.  */
 389     struct Broadcast_Operation *broadcast;
 390
 391     /* Compressed disp8*N attribute.  */
 392     unsigned int memshift;
 393
 394     /* Prefer load or store in encoding.  */
 395     enum
 396       {
 397         dir_encoding_default = 0,
 398         dir_encoding_load,
 399         dir_encoding_store,
 400         dir_encoding_swap
 401       } dir_encoding;
 402
 403     /* Prefer 8bit or 32bit displacement in encoding.  */
 404     enum
 405       {
 406         disp_encoding_default = 0,
 407         disp_encoding_8bit,
 408         disp_encoding_32bit
 409       } disp_encoding;
 410
 411     /* Prefer the REX byte in encoding.  */
 412     bfd_boolean rex_encoding;
 413
 414     /* Disable instruction size optimization.  */
 415     bfd_boolean no_optimize;
 416
 417     /* How to encode vector instructions.  */
 418     enum
 419       {
 420         vex_encoding_default = 0,
 421         vex_encoding_vex,
 422         vex_encoding_vex3,
 423         vex_encoding_evex
 424       } vec_encoding;
 425
 426     /* REP prefix.  */
 427     const char *rep_prefix;
 428
 429     /* HLE prefix.  */
 430     const char *hle_prefix;
 431
 432     /* Have BND prefix.  */
 433     const char *bnd_prefix;
 434
 435     /* Have NOTRACK prefix.  */
 436     const char *notrack_prefix;
 437
 438     /* Error message.  */
 439     enum i386_error error;
 440   };
 441
 442 typedef struct _i386_insn i386_insn;
 443
 444 /* Link RC type with corresponding string, that'll be looked for in
 445    asm.  */
 446 struct RC_name
 447 {
 448   enum rc_type type;
 449   const char *name;
 450   unsigned int len;
 451 };
 452
 453 static const struct RC_name RC_NamesTable[] =
 454 {
 455   {  rne, STRING_COMMA_LEN ("rn-sae") },
 456   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 457   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 458   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 459   {  saeonly,  STRING_COMMA_LEN ("sae") },
 460 };
 461
 462 /* List of chars besides those in app.c:symbol_chars that can start an
 463    operand.  Used to prevent the scrubber eating vital white-space.  */
 464 const char extra_symbol_chars[] = "*%-([{}"
 465 #ifdef LEX_AT
 466         "@"
 467 #endif
 468 #ifdef LEX_QM
 469         "?"
 470 #endif
 471         ;
 472
 473 #if (defined (TE_I386AIX)                               \
 474      || ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
 475          && !defined (TE_GNU)                           \
 476          && !defined (TE_LINUX)                         \
 477          && !defined (TE_NACL)                          \
 478          && !defined (TE_FreeBSD)                       \
 479          && !defined (TE_DragonFly)                     \
 480          && !defined (TE_NetBSD)))
 481 /* This array holds the chars that always start a comment.  If the
 482    pre-processor is disabled, these aren't very useful.  The option
 483    --divide will remove '/' from this list.  */
 484 const char *i386_comment_chars = "#/";
 485 #define SVR4_COMMENT_CHARS 1
 486 #define PREFIX_SEPARATOR '\\'
 487
 488 #else
 489 const char *i386_comment_chars = "#";
 490 #define PREFIX_SEPARATOR '/'
 491 #endif
 492
 493 /* This array holds the chars that only start a comment at the beginning of
 494    a line.  If the line seems to have the form '# 123 filename'
 495    .line and .file directives will appear in the pre-processed output.
 496    Note that input_file.c hand checks for '#' at the beginning of the
 497    first line of the input file.  This is because the compiler outputs
 498    #NO_APP at the beginning of its output.
 499    Also note that comments started like this one will always work if
 500    '/' isn't otherwise defined.  */
 501 const char line_comment_chars[] = "#/";
 502
 503 const char line_separator_chars[] = ";";
 504
 505 /* Chars that can be used to separate mant from exp in floating point
 506    nums.  */
 507 const char EXP_CHARS[] = "eE";
 508
 509 /* Chars that mean this number is a floating point constant
 510    As in 0f12.456
 511    or    0d1.2345e12.  */
 512 const char FLT_CHARS[] = "fFdDxX";
 513
 514 /* Tables for lexical analysis.  */
 515 static char mnemonic_chars[256];
 516 static char register_chars[256];
 517 static char operand_chars[256];
 518 static char identifier_chars[256];
 519 static char digit_chars[256];
 520
 521 /* Lexical macros.  */
 522 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 523 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 524 #define is_register_char(x) (register_chars[(unsigned char) x])
 525 #define is_space_char(x) ((x) == ' ')
 526 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 527 #define is_digit_char(x) (digit_chars[(unsigned char) x])
 528
 529 /* All non-digit non-letter characters that may occur in an operand.  */
 530 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 531
 532 /* md_assemble() always leaves the strings it's passed unaltered.  To
 533    effect this we maintain a stack of saved characters that we've smashed
 534    with '\0's (indicating end of strings for various sub-fields of the
 535    assembler instruction).  */
 536 static char save_stack[32];
 537 static char *save_stack_p;
 538 #define END_STRING_AND_SAVE(s) \
 539         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 540 #define RESTORE_END_STRING(s) \
 541         do { *(s) = *--save_stack_p; } while (0)
 542
 543 /* The instruction we're assembling.  */
 544 static i386_insn i;
 545
 546 /* Possible templates for current insn.  */
 547 static const templates *current_templates;
 548
 549 /* Per instruction expressionS buffers: max displacements & immediates.  */
 550 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 551 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 552
 553 /* Current operand we are working on.  */
 554 static int this_operand = -1;
 555
 556 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 557    these.  */
 558
 559 enum flag_code {
 560         CODE_32BIT,
 561         CODE_16BIT,
 562         CODE_64BIT };
 563
 564 static enum flag_code flag_code;
 565 static unsigned int object_64bit;
 566 static unsigned int disallow_64bit_reloc;
 567 static int use_rela_relocations = 0;
 568 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 569 static const char *tls_get_addr;
 570
 571 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 572      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 573      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 574
 575 /* The ELF ABI to use.  */
 576 enum x86_elf_abi
 577 {
 578   I386_ABI,
 579   X86_64_ABI,
 580   X86_64_X32_ABI
 581 };
 582
 583 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 584 #endif
 585
 586 #if defined (TE_PE) || defined (TE_PEP)
 587 /* Use big object file format.  */
 588 static int use_big_obj = 0;
 589 #endif
 590
 591 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 592 /* 1 if generating code for a shared library.  */
 593 static int shared = 0;
 594 #endif
 595
 596 /* 1 for intel syntax,
 597    0 if att syntax.  */
 598 static int intel_syntax = 0;
 599
 600 static enum x86_64_isa
 601 {
 602   amd64 = 1,    /* AMD64 ISA.  */
 603   intel64       /* Intel64 ISA.  */
 604 } isa64;
 605
 606 /* 1 for intel mnemonic,
 607    0 if att mnemonic.  */
 608 static int intel_mnemonic = !SYSV386_COMPAT;
 609
 610 /* 1 if pseudo registers are permitted.  */
 611 static int allow_pseudo_reg = 0;
 612
 613 /* 1 if register prefix % not required.  */
 614 static int allow_naked_reg = 0;
 615
 616 /* 1 if the assembler should add BND prefix for all control-transferring
 617    instructions supporting it, even if this prefix wasn't specified
 618    explicitly.  */
 619 static int add_bnd_prefix = 0;
 620
 621 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 622 static int allow_index_reg = 0;
 623
 624 /* 1 if the assembler should ignore LOCK prefix, even if it was
 625    specified explicitly.  */
 626 static int omit_lock_prefix = 0;
 627
 628 /* 1 if the assembler should encode lfence, mfence, and sfence as
 629    "lock addl $0, (%{re}sp)".  */
 630 static int avoid_fence = 0;
 631
 632 /* Type of the previous instruction.  */
 633 static struct
 634   {
 635     segT seg;
 636     const char *file;
 637     const char *name;
 638     unsigned int line;
 639     enum last_insn_kind
 640       {
 641         last_insn_other = 0,
 642         last_insn_directive,
 643         last_insn_prefix
 644       } kind;
 645   } last_insn;
 646
 647 /* 1 if the assembler should generate relax relocations.  */
 648
 649 static int generate_relax_relocations
 650   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 651
 652 static enum check_kind
 653   {
 654     check_none = 0,
 655     check_warning,
 656     check_error
 657   }
 658 sse_check, operand_check = check_warning;
 659
 660 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 661 static int align_branch_power = 0;
 662
 663 /* Types of branches to align.  */
 664 enum align_branch_kind
 665   {
 666     align_branch_none = 0,
 667     align_branch_jcc = 1,
 668     align_branch_fused = 2,
 669     align_branch_jmp = 3,
 670     align_branch_call = 4,
 671     align_branch_indirect = 5,
 672     align_branch_ret = 6
 673   };
 674
 675 /* Type bits of branches to align.  */
 676 enum align_branch_bit
 677   {
 678     align_branch_jcc_bit = 1 << align_branch_jcc,
 679     align_branch_fused_bit = 1 << align_branch_fused,
 680     align_branch_jmp_bit = 1 << align_branch_jmp,
 681     align_branch_call_bit = 1 << align_branch_call,
 682     align_branch_indirect_bit = 1 << align_branch_indirect,
 683     align_branch_ret_bit = 1 << align_branch_ret
 684   };
 685
 686 static unsigned int align_branch = (align_branch_jcc_bit
 687                                     | align_branch_fused_bit
 688                                     | align_branch_jmp_bit);
 689
 690 /* Types of condition jump used by macro-fusion.  */
 691 enum mf_jcc_kind
 692   {
 693     mf_jcc_jo = 0,  /* base opcode 0x70  */
 694     mf_jcc_jc,      /* base opcode 0x72  */
 695     mf_jcc_je,      /* base opcode 0x74  */
 696     mf_jcc_jna,     /* base opcode 0x76  */
 697     mf_jcc_js,      /* base opcode 0x78  */
 698     mf_jcc_jp,      /* base opcode 0x7a  */
 699     mf_jcc_jl,      /* base opcode 0x7c  */
 700     mf_jcc_jle,     /* base opcode 0x7e  */
 701   };
 702
 703 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 704 enum mf_cmp_kind
 705   {
 706     mf_cmp_test_and,  /* test/cmp */
 707     mf_cmp_alu_cmp,  /* add/sub/cmp */
 708     mf_cmp_incdec  /* inc/dec */
 709   };
 710
 711 /* The maximum padding size for fused jcc.  CMP like instruction can
 712    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 713    prefixes.   */
 714 #define MAX_FUSED_JCC_PADDING_SIZE 20
 715
 716 /* The maximum number of prefixes added for an instruction.  */
 717 static unsigned int align_branch_prefix_size = 5;
 718
 719 /* Optimization:
 720    1. Clear the REX_W bit with register operand if possible.
 721    2. Above plus use 128bit vector instruction to clear the full vector
 722       register.
 723  */
 724 static int optimize = 0;
 725
 726 /* Optimization:
 727    1. Clear the REX_W bit with register operand if possible.
 728    2. Above plus use 128bit vector instruction to clear the full vector
 729       register.
 730    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 731       "testb $imm7,%r8".
 732  */
 733 static int optimize_for_space = 0;
 734
 735 /* Register prefix used for error message.  */
 736 static const char *register_prefix = "%";
 737
 738 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 739    leave, push, and pop instructions so that gcc has the same stack
 740    frame as in 32 bit mode.  */
 741 static char stackop_size = '\0';
 742
 743 /* Non-zero to optimize code alignment.  */
 744 int optimize_align_code = 1;
 745
 746 /* Non-zero to quieten some warnings.  */
 747 static int quiet_warnings = 0;
 748
 749 /* CPU name.  */
 750 static const char *cpu_arch_name = NULL;
 751 static char *cpu_sub_arch_name = NULL;
 752
 753 /* CPU feature flags.  */
 754 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 755
 756 /* If we have selected a cpu we are generating instructions for.  */
 757 static int cpu_arch_tune_set = 0;
 758
 759 /* Cpu we are generating instructions for.  */
 760 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 761
 762 /* CPU feature flags of cpu we are generating instructions for.  */
 763 static i386_cpu_flags cpu_arch_tune_flags;
 764
 765 /* CPU instruction set architecture used.  */
 766 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 767
 768 /* CPU feature flags of instruction set architecture used.  */
 769 i386_cpu_flags cpu_arch_isa_flags;
 770
 771 /* If set, conditional jumps are not automatically promoted to handle
 772    larger than a byte offset.  */
 773 static unsigned int no_cond_jump_promotion = 0;
 774
 775 /* Encode SSE instructions with VEX prefix.  */
 776 static unsigned int sse2avx;
 777
 778 /* Encode scalar AVX instructions with specific vector length.  */
 779 static enum
 780   {
 781     vex128 = 0,
 782     vex256
 783   } avxscalar;
 784
 785 /* Encode VEX WIG instructions with specific vex.w.  */
 786 static enum
 787   {
 788     vexw0 = 0,
 789     vexw1
 790   } vexwig;
 791
 792 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 793 static enum
 794   {
 795     evexl128 = 0,
 796     evexl256,
 797     evexl512
 798   } evexlig;
 799
 800 /* Encode EVEX WIG instructions with specific evex.w.  */
 801 static enum
 802   {
 803     evexw0 = 0,
 804     evexw1
 805   } evexwig;
 806
 807 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 808 static enum rc_type evexrcig = rne;
 809
 810 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 811 static symbolS *GOT_symbol;
 812
 813 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 814 unsigned int x86_dwarf2_return_column;
 815
 816 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 817 int x86_cie_data_alignment;
 818
 819 /* Interface to relax_segment.
 820    There are 3 major relax states for 386 jump insns because the
 821    different types of jumps add different sizes to frags when we're
 822    figuring out what sort of jump to choose to reach a given label.
 823
 824    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 825    branches which are handled by md_estimate_size_before_relax() and
 826    i386_generic_table_relax_frag().  */
 827
 828 /* Types.  */
 829 #define UNCOND_JUMP 0
 830 #define COND_JUMP 1
 831 #define COND_JUMP86 2
 832 #define BRANCH_PADDING 3
 833 #define BRANCH_PREFIX 4
 834 #define FUSED_JCC_PADDING 5
 835
 836 /* Sizes.  */
 837 #define CODE16  1
 838 #define SMALL   0
 839 #define SMALL16 (SMALL | CODE16)
 840 #define BIG     2
 841 #define BIG16   (BIG | CODE16)
 842
 843 #ifndef INLINE
 844 #ifdef __GNUC__
 845 #define INLINE __inline__
 846 #else
 847 #define INLINE
 848 #endif
 849 #endif
 850
 851 #define ENCODE_RELAX_STATE(type, size) \
 852   ((relax_substateT) (((type) << 2) | (size)))
 853 #define TYPE_FROM_RELAX_STATE(s) \
 854   ((s) >> 2)
 855 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 856     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 857
 858 /* This table is used by relax_frag to promote short jumps to long
 859    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 860    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 861    don't allow a short jump in a 32 bit code segment to be promoted to
 862    a 16 bit offset jump because it's slower (requires data size
 863    prefix), and doesn't work, unless the destination is in the bottom
 864    64k of the code segment (The top 16 bits of eip are zeroed).  */
 865
 866 const relax_typeS md_relax_table[] =
 867 {
 868   /* The fields are:
 869      1) most positive reach of this state,
 870      2) most negative reach of this state,
 871      3) how many bytes this mode will have in the variable part of the frag
 872      4) which index into the table to try if we can't fit into this one.  */
 873
 874   /* UNCOND_JUMP states.  */
 875   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 876   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 877   /* dword jmp adds 4 bytes to frag:
 878      0 extra opcode bytes, 4 displacement bytes.  */
 879   {0, 0, 4, 0},
 880   /* word jmp adds 2 byte2 to frag:
 881      0 extra opcode bytes, 2 displacement bytes.  */
 882   {0, 0, 2, 0},
 883
 884   /* COND_JUMP states.  */
 885   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 886   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 887   /* dword conditionals adds 5 bytes to frag:
 888      1 extra opcode byte, 4 displacement bytes.  */
 889   {0, 0, 5, 0},
 890   /* word conditionals add 3 bytes to frag:
 891      1 extra opcode byte, 2 displacement bytes.  */
 892   {0, 0, 3, 0},
 893
 894   /* COND_JUMP86 states.  */
 895   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 896   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 897   /* dword conditionals adds 5 bytes to frag:
 898      1 extra opcode byte, 4 displacement bytes.  */
 899   {0, 0, 5, 0},
 900   /* word conditionals add 4 bytes to frag:
 901      1 displacement byte and a 3 byte long branch insn.  */
 902   {0, 0, 4, 0}
 903 };
 904
 905 static const arch_entry cpu_arch[] =
 906 {
 907   /* Do not replace the first two entries - i386_target_format()
 908      relies on them being there in this order.  */
 909   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 910     CPU_GENERIC32_FLAGS, 0 },
 911   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 912     CPU_GENERIC64_FLAGS, 0 },
 913   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 914     CPU_NONE_FLAGS, 0 },
 915   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 916     CPU_I186_FLAGS, 0 },
 917   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 918     CPU_I286_FLAGS, 0 },
 919   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 920     CPU_I386_FLAGS, 0 },
 921   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 922     CPU_I486_FLAGS, 0 },
 923   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 924     CPU_I586_FLAGS, 0 },
 925   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 926     CPU_I686_FLAGS, 0 },
 927   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 928     CPU_I586_FLAGS, 0 },
 929   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 930     CPU_PENTIUMPRO_FLAGS, 0 },
 931   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 932     CPU_P2_FLAGS, 0 },
 933   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 934     CPU_P3_FLAGS, 0 },
 935   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 936     CPU_P4_FLAGS, 0 },
 937   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 938     CPU_CORE_FLAGS, 0 },
 939   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 940     CPU_NOCONA_FLAGS, 0 },
 941   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 942     CPU_CORE_FLAGS, 1 },
 943   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 944     CPU_CORE_FLAGS, 0 },
 945   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 946     CPU_CORE2_FLAGS, 1 },
 947   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 948     CPU_CORE2_FLAGS, 0 },
 949   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 950     CPU_COREI7_FLAGS, 0 },
 951   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
 952     CPU_L1OM_FLAGS, 0 },
 953   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
 954     CPU_K1OM_FLAGS, 0 },
 955   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 956     CPU_IAMCU_FLAGS, 0 },
 957   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 958     CPU_K6_FLAGS, 0 },
 959   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 960     CPU_K6_2_FLAGS, 0 },
 961   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 962     CPU_ATHLON_FLAGS, 0 },
 963   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 964     CPU_K8_FLAGS, 1 },
 965   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
 966     CPU_K8_FLAGS, 0 },
 967   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
 968     CPU_K8_FLAGS, 0 },
 969   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
 970     CPU_AMDFAM10_FLAGS, 0 },
 971   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
 972     CPU_BDVER1_FLAGS, 0 },
 973   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
 974     CPU_BDVER2_FLAGS, 0 },
 975   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
 976     CPU_BDVER3_FLAGS, 0 },
 977   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
 978     CPU_BDVER4_FLAGS, 0 },
 979   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
 980     CPU_ZNVER1_FLAGS, 0 },
 981   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
 982     CPU_ZNVER2_FLAGS, 0 },
 983   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
 984     CPU_BTVER1_FLAGS, 0 },
 985   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
 986     CPU_BTVER2_FLAGS, 0 },
 987   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
 988     CPU_8087_FLAGS, 0 },
 989   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
 990     CPU_287_FLAGS, 0 },
 991   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
 992     CPU_387_FLAGS, 0 },
 993   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
 994     CPU_687_FLAGS, 0 },
 995   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
 996     CPU_CMOV_FLAGS, 0 },
 997   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
 998     CPU_FXSR_FLAGS, 0 },
 999   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1000     CPU_MMX_FLAGS, 0 },
1001   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1002     CPU_SSE_FLAGS, 0 },
1003   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1004     CPU_SSE2_FLAGS, 0 },
1005   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1006     CPU_SSE3_FLAGS, 0 },
1007   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1008     CPU_SSE4A_FLAGS, 0 },
1009   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1010     CPU_SSSE3_FLAGS, 0 },
1011   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1012     CPU_SSE4_1_FLAGS, 0 },
1013   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1014     CPU_SSE4_2_FLAGS, 0 },
1015   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1016     CPU_SSE4_2_FLAGS, 0 },
1017   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1018     CPU_AVX_FLAGS, 0 },
1019   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1020     CPU_AVX2_FLAGS, 0 },
1021   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1022     CPU_AVX512F_FLAGS, 0 },
1023   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1024     CPU_AVX512CD_FLAGS, 0 },
1025   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1026     CPU_AVX512ER_FLAGS, 0 },
1027   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1028     CPU_AVX512PF_FLAGS, 0 },
1029   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1030     CPU_AVX512DQ_FLAGS, 0 },
1031   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1032     CPU_AVX512BW_FLAGS, 0 },
1033   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1034     CPU_AVX512VL_FLAGS, 0 },
1035   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1036     CPU_VMX_FLAGS, 0 },
1037   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1038     CPU_VMFUNC_FLAGS, 0 },
1039   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1040     CPU_SMX_FLAGS, 0 },
1041   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1042     CPU_XSAVE_FLAGS, 0 },
1043   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1044     CPU_XSAVEOPT_FLAGS, 0 },
1045   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1046     CPU_XSAVEC_FLAGS, 0 },
1047   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1048     CPU_XSAVES_FLAGS, 0 },
1049   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1050     CPU_AES_FLAGS, 0 },
1051   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1052     CPU_PCLMUL_FLAGS, 0 },
1053   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1054     CPU_PCLMUL_FLAGS, 1 },
1055   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1056     CPU_FSGSBASE_FLAGS, 0 },
1057   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1058     CPU_RDRND_FLAGS, 0 },
1059   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1060     CPU_F16C_FLAGS, 0 },
1061   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1062     CPU_BMI2_FLAGS, 0 },
1063   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1064     CPU_FMA_FLAGS, 0 },
1065   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1066     CPU_FMA4_FLAGS, 0 },
1067   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1068     CPU_XOP_FLAGS, 0 },
1069   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1070     CPU_LWP_FLAGS, 0 },
1071   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1072     CPU_MOVBE_FLAGS, 0 },
1073   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1074     CPU_CX16_FLAGS, 0 },
1075   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1076     CPU_EPT_FLAGS, 0 },
1077   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1078     CPU_LZCNT_FLAGS, 0 },
1079   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1080     CPU_POPCNT_FLAGS, 0 },
1081   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1082     CPU_HLE_FLAGS, 0 },
1083   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1084     CPU_RTM_FLAGS, 0 },
1085   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1086     CPU_INVPCID_FLAGS, 0 },
1087   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1088     CPU_CLFLUSH_FLAGS, 0 },
1089   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1090     CPU_NOP_FLAGS, 0 },
1091   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1092     CPU_SYSCALL_FLAGS, 0 },
1093   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1094     CPU_RDTSCP_FLAGS, 0 },
1095   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1096     CPU_3DNOW_FLAGS, 0 },
1097   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1098     CPU_3DNOWA_FLAGS, 0 },
1099   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1100     CPU_PADLOCK_FLAGS, 0 },
1101   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1102     CPU_SVME_FLAGS, 1 },
1103   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1104     CPU_SVME_FLAGS, 0 },
1105   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1106     CPU_SSE4A_FLAGS, 0 },
1107   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1108     CPU_ABM_FLAGS, 0 },
1109   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1110     CPU_BMI_FLAGS, 0 },
1111   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1112     CPU_TBM_FLAGS, 0 },
1113   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1114     CPU_ADX_FLAGS, 0 },
1115   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1116     CPU_RDSEED_FLAGS, 0 },
1117   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1118     CPU_PRFCHW_FLAGS, 0 },
1119   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1120     CPU_SMAP_FLAGS, 0 },
1121   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1122     CPU_MPX_FLAGS, 0 },
1123   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1124     CPU_SHA_FLAGS, 0 },
1125   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1126     CPU_CLFLUSHOPT_FLAGS, 0 },
1127   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1128     CPU_PREFETCHWT1_FLAGS, 0 },
1129   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1130     CPU_SE1_FLAGS, 0 },
1131   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1132     CPU_CLWB_FLAGS, 0 },
1133   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1134     CPU_AVX512IFMA_FLAGS, 0 },
1135   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1136     CPU_AVX512VBMI_FLAGS, 0 },
1137   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1138     CPU_AVX512_4FMAPS_FLAGS, 0 },
1139   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1140     CPU_AVX512_4VNNIW_FLAGS, 0 },
1141   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1142     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1143   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1144     CPU_AVX512_VBMI2_FLAGS, 0 },
1145   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1146     CPU_AVX512_VNNI_FLAGS, 0 },
1147   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1148     CPU_AVX512_BITALG_FLAGS, 0 },
1149   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1150     CPU_CLZERO_FLAGS, 0 },
1151   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1152     CPU_MWAITX_FLAGS, 0 },
1153   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1154     CPU_OSPKE_FLAGS, 0 },
1155   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1156     CPU_RDPID_FLAGS, 0 },
1157   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1158     CPU_PTWRITE_FLAGS, 0 },
1159   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1160     CPU_IBT_FLAGS, 0 },
1161   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1162     CPU_SHSTK_FLAGS, 0 },
1163   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1164     CPU_GFNI_FLAGS, 0 },
1165   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1166     CPU_VAES_FLAGS, 0 },
1167   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1168     CPU_VPCLMULQDQ_FLAGS, 0 },
1169   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1170     CPU_WBNOINVD_FLAGS, 0 },
1171   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1172     CPU_PCONFIG_FLAGS, 0 },
1173   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1174     CPU_WAITPKG_FLAGS, 0 },
1175   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1176     CPU_CLDEMOTE_FLAGS, 0 },
1177   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1178     CPU_MOVDIRI_FLAGS, 0 },
1179   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1180     CPU_MOVDIR64B_FLAGS, 0 },
1181   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1182     CPU_AVX512_BF16_FLAGS, 0 },
1183   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1184     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1185   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1186     CPU_ENQCMD_FLAGS, 0 },
1187   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1188     CPU_RDPRU_FLAGS, 0 },
1189   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1190     CPU_MCOMMIT_FLAGS, 0 },
1191   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1192     CPU_SEV_ES_FLAGS, 0 },
1193 };
1194
1195 static const noarch_entry cpu_noarch[] =
1196 {
1197   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1198   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1199   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1200   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1201   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1202   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1203   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1204   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1205   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1206   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1207   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1208   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1209   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1210   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1211   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1212   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1213   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1214   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1215   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1216   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1217   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1218   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1219   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1220   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1221   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1222   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1223   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1224   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1225   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1226   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1227   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1228   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1229   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1230   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1231   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1232   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1233   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1234   { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
1235   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1236 };
1237
1238 #ifdef I386COFF
1239 /* Like s_lcomm_internal in gas/read.c but the alignment string
1240    is allowed to be optional.  */
1241
1242 static symbolS *
1243 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1244 {
1245   addressT align = 0;
1246
1247   SKIP_WHITESPACE ();
1248
1249   if (needs_align
1250       && *input_line_pointer == ',')
1251     {
1252       align = parse_align (needs_align - 1);
1253
1254       if (align == (addressT) -1)
1255         return NULL;
1256     }
1257   else
1258     {
1259       if (size >= 8)
1260         align = 3;
1261       else if (size >= 4)
1262         align = 2;
1263       else if (size >= 2)
1264         align = 1;
1265       else
1266         align = 0;
1267     }
1268
1269   bss_alloc (symbolP, size, align);
1270   return symbolP;
1271 }
1272
1273 static void
1274 pe_lcomm (int needs_align)
1275 {
1276   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1277 }
1278 #endif
1279
1280 const pseudo_typeS md_pseudo_table[] =
1281 {
1282 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1283   {"align", s_align_bytes, 0},
1284 #else
1285   {"align", s_align_ptwo, 0},
1286 #endif
1287   {"arch", set_cpu_arch, 0},
1288 #ifndef I386COFF
1289   {"bss", s_bss, 0},
1290 #else
1291   {"lcomm", pe_lcomm, 1},
1292 #endif
1293   {"ffloat", float_cons, 'f'},
1294   {"dfloat", float_cons, 'd'},
1295   {"tfloat", float_cons, 'x'},
1296   {"value", cons, 2},
1297   {"slong", signed_cons, 4},
1298   {"noopt", s_ignore, 0},
1299   {"optim", s_ignore, 0},
1300   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1301   {"code16", set_code_flag, CODE_16BIT},
1302   {"code32", set_code_flag, CODE_32BIT},
1303 #ifdef BFD64
1304   {"code64", set_code_flag, CODE_64BIT},
1305 #endif
1306   {"intel_syntax", set_intel_syntax, 1},
1307   {"att_syntax", set_intel_syntax, 0},
1308   {"intel_mnemonic", set_intel_mnemonic, 1},
1309   {"att_mnemonic", set_intel_mnemonic, 0},
1310   {"allow_index_reg", set_allow_index_reg, 1},
1311   {"disallow_index_reg", set_allow_index_reg, 0},
1312   {"sse_check", set_check, 0},
1313   {"operand_check", set_check, 1},
1314 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1315   {"largecomm", handle_large_common, 0},
1316 #else
1317   {"file", dwarf2_directive_file, 0},
1318   {"loc", dwarf2_directive_loc, 0},
1319   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1320 #endif
1321 #ifdef TE_PE
1322   {"secrel32", pe_directive_secrel, 0},
1323 #endif
1324   {0, 0, 0}
1325 };
1326
1327 /* For interface with expression ().  */
1328 extern char *input_line_pointer;
1329
1330 /* Hash table for instruction mnemonic lookup.  */
1331 static struct hash_control *op_hash;
1332
1333 /* Hash table for register lookup.  */
1334 static struct hash_control *reg_hash;
1335 \f
1336   /* Various efficient no-op patterns for aligning code labels.
1337      Note: Don't try to assemble the instructions in the comments.
1338      0L and 0w are not legal.  */
1339 static const unsigned char f32_1[] =
1340   {0x90};                               /* nop                  */
1341 static const unsigned char f32_2[] =
1342   {0x66,0x90};                          /* xchg %ax,%ax         */
1343 static const unsigned char f32_3[] =
1344   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1345 static const unsigned char f32_4[] =
1346   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1347 static const unsigned char f32_6[] =
1348   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1349 static const unsigned char f32_7[] =
1350   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1351 static const unsigned char f16_3[] =
1352   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1353 static const unsigned char f16_4[] =
1354   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1355 static const unsigned char jump_disp8[] =
1356   {0xeb};                               /* jmp disp8           */
1357 static const unsigned char jump32_disp32[] =
1358   {0xe9};                               /* jmp disp32          */
1359 static const unsigned char jump16_disp32[] =
1360   {0x66,0xe9};                          /* jmp disp32          */
1361 /* 32-bit NOPs patterns.  */
1362 static const unsigned char *const f32_patt[] = {
1363   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1364 };
1365 /* 16-bit NOPs patterns.  */
1366 static const unsigned char *const f16_patt[] = {
1367   f32_1, f32_2, f16_3, f16_4
1368 };
1369 /* nopl (%[re]ax) */
1370 static const unsigned char alt_3[] =
1371   {0x0f,0x1f,0x00};
1372 /* nopl 0(%[re]ax) */
1373 static const unsigned char alt_4[] =
1374   {0x0f,0x1f,0x40,0x00};
1375 /* nopl 0(%[re]ax,%[re]ax,1) */
1376 static const unsigned char alt_5[] =
1377   {0x0f,0x1f,0x44,0x00,0x00};
1378 /* nopw 0(%[re]ax,%[re]ax,1) */
1379 static const unsigned char alt_6[] =
1380   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1381 /* nopl 0L(%[re]ax) */
1382 static const unsigned char alt_7[] =
1383   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1384 /* nopl 0L(%[re]ax,%[re]ax,1) */
1385 static const unsigned char alt_8[] =
1386   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1387 /* nopw 0L(%[re]ax,%[re]ax,1) */
1388 static const unsigned char alt_9[] =
1389   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1390 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1391 static const unsigned char alt_10[] =
1392   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1393 /* data16 nopw %cs:0L(%eax,%eax,1) */
1394 static const unsigned char alt_11[] =
1395   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1396 /* 32-bit and 64-bit NOPs patterns.  */
1397 static const unsigned char *const alt_patt[] = {
1398   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1399   alt_9, alt_10, alt_11
1400 };
1401
1402 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1403    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1404
1405 static void
1406 i386_output_nops (char *where, const unsigned char *const *patt,
1407                   int count, int max_single_nop_size)
1408
1409 {
1410   /* Place the longer NOP first.  */
1411   int last;
1412   int offset;
1413   const unsigned char *nops;
1414
1415   if (max_single_nop_size < 1)
1416     {
1417       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1418                 max_single_nop_size);
1419       return;
1420     }
1421
1422   nops = patt[max_single_nop_size - 1];
1423
1424   /* Use the smaller one if the requsted one isn't available.  */
1425   if (nops == NULL)
1426     {
1427       max_single_nop_size--;
1428       nops = patt[max_single_nop_size - 1];
1429     }
1430
1431   last = count % max_single_nop_size;
1432
1433   count -= last;
1434   for (offset = 0; offset < count; offset += max_single_nop_size)
1435     memcpy (where + offset, nops, max_single_nop_size);
1436
1437   if (last)
1438     {
1439       nops = patt[last - 1];
1440       if (nops == NULL)
1441         {
1442           /* Use the smaller one plus one-byte NOP if the needed one
1443              isn't available.  */
1444           last--;
1445           nops = patt[last - 1];
1446           memcpy (where + offset, nops, last);
1447           where[offset + last] = *patt[0];
1448         }
1449       else
1450         memcpy (where + offset, nops, last);
1451     }
1452 }
1453
1454 static INLINE int
1455 fits_in_imm7 (offsetT num)
1456 {
1457   return (num & 0x7f) == num;
1458 }
1459
1460 static INLINE int
1461 fits_in_imm31 (offsetT num)
1462 {
1463   return (num & 0x7fffffff) == num;
1464 }
1465
1466 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1467    single NOP instruction LIMIT.  */
1468
1469 void
1470 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1471 {
1472   const unsigned char *const *patt = NULL;
1473   int max_single_nop_size;
1474   /* Maximum number of NOPs before switching to jump over NOPs.  */
1475   int max_number_of_nops;
1476
1477   switch (fragP->fr_type)
1478     {
1479     case rs_fill_nop:
1480     case rs_align_code:
1481       break;
1482     case rs_machine_dependent:
1483       /* Allow NOP padding for jumps and calls.  */
1484       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1485           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1486         break;
1487       /* Fall through.  */
1488     default:
1489       return;
1490     }
1491
1492   /* We need to decide which NOP sequence to use for 32bit and
1493      64bit. When -mtune= is used:
1494
1495      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1496      PROCESSOR_GENERIC32, f32_patt will be used.
1497      2. For the rest, alt_patt will be used.
1498
1499      When -mtune= isn't used, alt_patt will be used if
1500      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1501      be used.
1502
1503      When -march= or .arch is used, we can't use anything beyond
1504      cpu_arch_isa_flags.   */
1505
1506   if (flag_code == CODE_16BIT)
1507     {
1508       patt = f16_patt;
1509       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1510       /* Limit number of NOPs to 2 in 16-bit mode.  */
1511       max_number_of_nops = 2;
1512     }
1513   else
1514     {
1515       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1516         {
1517           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1518           switch (cpu_arch_tune)
1519             {
1520             case PROCESSOR_UNKNOWN:
1521               /* We use cpu_arch_isa_flags to check if we SHOULD
1522                  optimize with nops.  */
1523               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1524                 patt = alt_patt;
1525               else
1526                 patt = f32_patt;
1527               break;
1528             case PROCESSOR_PENTIUM4:
1529             case PROCESSOR_NOCONA:
1530             case PROCESSOR_CORE:
1531             case PROCESSOR_CORE2:
1532             case PROCESSOR_COREI7:
1533             case PROCESSOR_L1OM:
1534             case PROCESSOR_K1OM:
1535             case PROCESSOR_GENERIC64:
1536             case PROCESSOR_K6:
1537             case PROCESSOR_ATHLON:
1538             case PROCESSOR_K8:
1539             case PROCESSOR_AMDFAM10:
1540             case PROCESSOR_BD:
1541             case PROCESSOR_ZNVER:
1542             case PROCESSOR_BT:
1543               patt = alt_patt;
1544               break;
1545             case PROCESSOR_I386:
1546             case PROCESSOR_I486:
1547             case PROCESSOR_PENTIUM:
1548             case PROCESSOR_PENTIUMPRO:
1549             case PROCESSOR_IAMCU:
1550             case PROCESSOR_GENERIC32:
1551               patt = f32_patt;
1552               break;
1553             }
1554         }
1555       else
1556         {
1557           switch (fragP->tc_frag_data.tune)
1558             {
1559             case PROCESSOR_UNKNOWN:
1560               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1561                  PROCESSOR_UNKNOWN.  */
1562               abort ();
1563               break;
1564
1565             case PROCESSOR_I386:
1566             case PROCESSOR_I486:
1567             case PROCESSOR_PENTIUM:
1568             case PROCESSOR_IAMCU:
1569             case PROCESSOR_K6:
1570             case PROCESSOR_ATHLON:
1571             case PROCESSOR_K8:
1572             case PROCESSOR_AMDFAM10:
1573             case PROCESSOR_BD:
1574             case PROCESSOR_ZNVER:
1575             case PROCESSOR_BT:
1576             case PROCESSOR_GENERIC32:
1577               /* We use cpu_arch_isa_flags to check if we CAN optimize
1578                  with nops.  */
1579               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1580                 patt = alt_patt;
1581               else
1582                 patt = f32_patt;
1583               break;
1584             case PROCESSOR_PENTIUMPRO:
1585             case PROCESSOR_PENTIUM4:
1586             case PROCESSOR_NOCONA:
1587             case PROCESSOR_CORE:
1588             case PROCESSOR_CORE2:
1589             case PROCESSOR_COREI7:
1590             case PROCESSOR_L1OM:
1591             case PROCESSOR_K1OM:
1592               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1593                 patt = alt_patt;
1594               else
1595                 patt = f32_patt;
1596               break;
1597             case PROCESSOR_GENERIC64:
1598               patt = alt_patt;
1599               break;
1600             }
1601         }
1602
1603       if (patt == f32_patt)
1604         {
1605           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1606           /* Limit number of NOPs to 2 for older processors.  */
1607           max_number_of_nops = 2;
1608         }
1609       else
1610         {
1611           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1612           /* Limit number of NOPs to 7 for newer processors.  */
1613           max_number_of_nops = 7;
1614         }
1615     }
1616
1617   if (limit == 0)
1618     limit = max_single_nop_size;
1619
1620   if (fragP->fr_type == rs_fill_nop)
1621     {
1622       /* Output NOPs for .nop directive.  */
1623       if (limit > max_single_nop_size)
1624         {
1625           as_bad_where (fragP->fr_file, fragP->fr_line,
1626                         _("invalid single nop size: %d "
1627                           "(expect within [0, %d])"),
1628                         limit, max_single_nop_size);
1629           return;
1630         }
1631     }
1632   else if (fragP->fr_type != rs_machine_dependent)
1633     fragP->fr_var = count;
1634
1635   if ((count / max_single_nop_size) > max_number_of_nops)
1636     {
1637       /* Generate jump over NOPs.  */
1638       offsetT disp = count - 2;
1639       if (fits_in_imm7 (disp))
1640         {
1641           /* Use "jmp disp8" if possible.  */
1642           count = disp;
1643           where[0] = jump_disp8[0];
1644           where[1] = count;
1645           where += 2;
1646         }
1647       else
1648         {
1649           unsigned int size_of_jump;
1650
1651           if (flag_code == CODE_16BIT)
1652             {
1653               where[0] = jump16_disp32[0];
1654               where[1] = jump16_disp32[1];
1655               size_of_jump = 2;
1656             }
1657           else
1658             {
1659               where[0] = jump32_disp32[0];
1660               size_of_jump = 1;
1661             }
1662
1663           count -= size_of_jump + 4;
1664           if (!fits_in_imm31 (count))
1665             {
1666               as_bad_where (fragP->fr_file, fragP->fr_line,
1667                             _("jump over nop padding out of range"));
1668               return;
1669             }
1670
1671           md_number_to_chars (where + size_of_jump, count, 4);
1672           where += size_of_jump + 4;
1673         }
1674     }
1675
1676   /* Generate multiple NOPs.  */
1677   i386_output_nops (where, patt, count, limit);
1678 }
1679
1680 static INLINE int
1681 operand_type_all_zero (const union i386_operand_type *x)
1682 {
1683   switch (ARRAY_SIZE(x->array))
1684     {
1685     case 3:
1686       if (x->array[2])
1687         return 0;
1688       /* Fall through.  */
1689     case 2:
1690       if (x->array[1])
1691         return 0;
1692       /* Fall through.  */
1693     case 1:
1694       return !x->array[0];
1695     default:
1696       abort ();
1697     }
1698 }
1699
1700 static INLINE void
1701 operand_type_set (union i386_operand_type *x, unsigned int v)
1702 {
1703   switch (ARRAY_SIZE(x->array))
1704     {
1705     case 3:
1706       x->array[2] = v;
1707       /* Fall through.  */
1708     case 2:
1709       x->array[1] = v;
1710       /* Fall through.  */
1711     case 1:
1712       x->array[0] = v;
1713       /* Fall through.  */
1714       break;
1715     default:
1716       abort ();
1717     }
1718
1719   x->bitfield.class = ClassNone;
1720   x->bitfield.instance = InstanceNone;
1721 }
1722
1723 static INLINE int
1724 operand_type_equal (const union i386_operand_type *x,
1725                     const union i386_operand_type *y)
1726 {
1727   switch (ARRAY_SIZE(x->array))
1728     {
1729     case 3:
1730       if (x->array[2] != y->array[2])
1731         return 0;
1732       /* Fall through.  */
1733     case 2:
1734       if (x->array[1] != y->array[1])
1735         return 0;
1736       /* Fall through.  */
1737     case 1:
1738       return x->array[0] == y->array[0];
1739       break;
1740     default:
1741       abort ();
1742     }
1743 }
1744
1745 static INLINE int
1746 cpu_flags_all_zero (const union i386_cpu_flags *x)
1747 {
1748   switch (ARRAY_SIZE(x->array))
1749     {
1750     case 4:
1751       if (x->array[3])
1752         return 0;
1753       /* Fall through.  */
1754     case 3:
1755       if (x->array[2])
1756         return 0;
1757       /* Fall through.  */
1758     case 2:
1759       if (x->array[1])
1760         return 0;
1761       /* Fall through.  */
1762     case 1:
1763       return !x->array[0];
1764     default:
1765       abort ();
1766     }
1767 }
1768
1769 static INLINE int
1770 cpu_flags_equal (const union i386_cpu_flags *x,
1771                  const union i386_cpu_flags *y)
1772 {
1773   switch (ARRAY_SIZE(x->array))
1774     {
1775     case 4:
1776       if (x->array[3] != y->array[3])
1777         return 0;
1778       /* Fall through.  */
1779     case 3:
1780       if (x->array[2] != y->array[2])
1781         return 0;
1782       /* Fall through.  */
1783     case 2:
1784       if (x->array[1] != y->array[1])
1785         return 0;
1786       /* Fall through.  */
1787     case 1:
1788       return x->array[0] == y->array[0];
1789       break;
1790     default:
1791       abort ();
1792     }
1793 }
1794
1795 static INLINE int
1796 cpu_flags_check_cpu64 (i386_cpu_flags f)
1797 {
1798   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1799            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1800 }
1801
1802 static INLINE i386_cpu_flags
1803 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1804 {
1805   switch (ARRAY_SIZE (x.array))
1806     {
1807     case 4:
1808       x.array [3] &= y.array [3];
1809       /* Fall through.  */
1810     case 3:
1811       x.array [2] &= y.array [2];
1812       /* Fall through.  */
1813     case 2:
1814       x.array [1] &= y.array [1];
1815       /* Fall through.  */
1816     case 1:
1817       x.array [0] &= y.array [0];
1818       break;
1819     default:
1820       abort ();
1821     }
1822   return x;
1823 }
1824
1825 static INLINE i386_cpu_flags
1826 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1827 {
1828   switch (ARRAY_SIZE (x.array))
1829     {
1830     case 4:
1831       x.array [3] |= y.array [3];
1832       /* Fall through.  */
1833     case 3:
1834       x.array [2] |= y.array [2];
1835       /* Fall through.  */
1836     case 2:
1837       x.array [1] |= y.array [1];
1838       /* Fall through.  */
1839     case 1:
1840       x.array [0] |= y.array [0];
1841       break;
1842     default:
1843       abort ();
1844     }
1845   return x;
1846 }
1847
1848 static INLINE i386_cpu_flags
1849 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1850 {
1851   switch (ARRAY_SIZE (x.array))
1852     {
1853     case 4:
1854       x.array [3] &= ~y.array [3];
1855       /* Fall through.  */
1856     case 3:
1857       x.array [2] &= ~y.array [2];
1858       /* Fall through.  */
1859     case 2:
1860       x.array [1] &= ~y.array [1];
1861       /* Fall through.  */
1862     case 1:
1863       x.array [0] &= ~y.array [0];
1864       break;
1865     default:
1866       abort ();
1867     }
1868   return x;
1869 }
1870
1871 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1872
1873 #define CPU_FLAGS_ARCH_MATCH            0x1
1874 #define CPU_FLAGS_64BIT_MATCH           0x2
1875
1876 #define CPU_FLAGS_PERFECT_MATCH \
1877   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1878
1879 /* Return CPU flags match bits. */
1880
1881 static int
1882 cpu_flags_match (const insn_template *t)
1883 {
1884   i386_cpu_flags x = t->cpu_flags;
1885   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1886
1887   x.bitfield.cpu64 = 0;
1888   x.bitfield.cpuno64 = 0;
1889
1890   if (cpu_flags_all_zero (&x))
1891     {
1892       /* This instruction is available on all archs.  */
1893       match |= CPU_FLAGS_ARCH_MATCH;
1894     }
1895   else
1896     {
1897       /* This instruction is available only on some archs.  */
1898       i386_cpu_flags cpu = cpu_arch_flags;
1899
1900       /* AVX512VL is no standalone feature - match it and then strip it.  */
1901       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1902         return match;
1903       x.bitfield.cpuavx512vl = 0;
1904
1905       cpu = cpu_flags_and (x, cpu);
1906       if (!cpu_flags_all_zero (&cpu))
1907         {
1908           if (x.bitfield.cpuavx)
1909             {
1910               /* We need to check a few extra flags with AVX.  */
1911               if (cpu.bitfield.cpuavx
1912                   && (!t->opcode_modifier.sse2avx || sse2avx)
1913                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1914                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1915                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1916                 match |= CPU_FLAGS_ARCH_MATCH;
1917             }
1918           else if (x.bitfield.cpuavx512f)
1919             {
1920               /* We need to check a few extra flags with AVX512F.  */
1921               if (cpu.bitfield.cpuavx512f
1922                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1923                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1924                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1925                 match |= CPU_FLAGS_ARCH_MATCH;
1926             }
1927           else
1928             match |= CPU_FLAGS_ARCH_MATCH;
1929         }
1930     }
1931   return match;
1932 }
1933
1934 static INLINE i386_operand_type
1935 operand_type_and (i386_operand_type x, i386_operand_type y)
1936 {
1937   if (x.bitfield.class != y.bitfield.class)
1938     x.bitfield.class = ClassNone;
1939   if (x.bitfield.instance != y.bitfield.instance)
1940     x.bitfield.instance = InstanceNone;
1941
1942   switch (ARRAY_SIZE (x.array))
1943     {
1944     case 3:
1945       x.array [2] &= y.array [2];
1946       /* Fall through.  */
1947     case 2:
1948       x.array [1] &= y.array [1];
1949       /* Fall through.  */
1950     case 1:
1951       x.array [0] &= y.array [0];
1952       break;
1953     default:
1954       abort ();
1955     }
1956   return x;
1957 }
1958
1959 static INLINE i386_operand_type
1960 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1961 {
1962   gas_assert (y.bitfield.class == ClassNone);
1963   gas_assert (y.bitfield.instance == InstanceNone);
1964
1965   switch (ARRAY_SIZE (x.array))
1966     {
1967     case 3:
1968       x.array [2] &= ~y.array [2];
1969       /* Fall through.  */
1970     case 2:
1971       x.array [1] &= ~y.array [1];
1972       /* Fall through.  */
1973     case 1:
1974       x.array [0] &= ~y.array [0];
1975       break;
1976     default:
1977       abort ();
1978     }
1979   return x;
1980 }
1981
1982 static INLINE i386_operand_type
1983 operand_type_or (i386_operand_type x, i386_operand_type y)
1984 {
1985   gas_assert (x.bitfield.class == ClassNone ||
1986               y.bitfield.class == ClassNone ||
1987               x.bitfield.class == y.bitfield.class);
1988   gas_assert (x.bitfield.instance == InstanceNone ||
1989               y.bitfield.instance == InstanceNone ||
1990               x.bitfield.instance == y.bitfield.instance);
1991
1992   switch (ARRAY_SIZE (x.array))
1993     {
1994     case 3:
1995       x.array [2] |= y.array [2];
1996       /* Fall through.  */
1997     case 2:
1998       x.array [1] |= y.array [1];
1999       /* Fall through.  */
2000     case 1:
2001       x.array [0] |= y.array [0];
2002       break;
2003     default:
2004       abort ();
2005     }
2006   return x;
2007 }
2008
2009 static INLINE i386_operand_type
2010 operand_type_xor (i386_operand_type x, i386_operand_type y)
2011 {
2012   gas_assert (y.bitfield.class == ClassNone);
2013   gas_assert (y.bitfield.instance == InstanceNone);
2014
2015   switch (ARRAY_SIZE (x.array))
2016     {
2017     case 3:
2018       x.array [2] ^= y.array [2];
2019       /* Fall through.  */
2020     case 2:
2021       x.array [1] ^= y.array [1];
2022       /* Fall through.  */
2023     case 1:
2024       x.array [0] ^= y.array [0];
2025       break;
2026     default:
2027       abort ();
2028     }
2029   return x;
2030 }
2031
2032 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2033 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2034 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2035 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2036 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2037 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2038 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2039 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2040 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2041 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2042 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2043 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2044 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2045 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2046 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2047 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2048 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2049
2050 enum operand_type
2051 {
2052   reg,
2053   imm,
2054   disp,
2055   anymem
2056 };
2057
2058 static INLINE int
2059 operand_type_check (i386_operand_type t, enum operand_type c)
2060 {
2061   switch (c)
2062     {
2063     case reg:
2064       return t.bitfield.class == Reg;
2065
2066     case imm:
2067       return (t.bitfield.imm8
2068               || t.bitfield.imm8s
2069               || t.bitfield.imm16
2070               || t.bitfield.imm32
2071               || t.bitfield.imm32s
2072               || t.bitfield.imm64);
2073
2074     case disp:
2075       return (t.bitfield.disp8
2076               || t.bitfield.disp16
2077               || t.bitfield.disp32
2078               || t.bitfield.disp32s
2079               || t.bitfield.disp64);
2080
2081     case anymem:
2082       return (t.bitfield.disp8
2083               || t.bitfield.disp16
2084               || t.bitfield.disp32
2085               || t.bitfield.disp32s
2086               || t.bitfield.disp64
2087               || t.bitfield.baseindex);
2088
2089     default:
2090       abort ();
2091     }
2092
2093   return 0;
2094 }
2095
2096 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2097    between operand GIVEN and opeand WANTED for instruction template T.  */
2098
2099 static INLINE int
2100 match_operand_size (const insn_template *t, unsigned int wanted,
2101                     unsigned int given)
2102 {
2103   return !((i.types[given].bitfield.byte
2104             && !t->operand_types[wanted].bitfield.byte)
2105            || (i.types[given].bitfield.word
2106                && !t->operand_types[wanted].bitfield.word)
2107            || (i.types[given].bitfield.dword
2108                && !t->operand_types[wanted].bitfield.dword)
2109            || (i.types[given].bitfield.qword
2110                && !t->operand_types[wanted].bitfield.qword)
2111            || (i.types[given].bitfield.tbyte
2112                && !t->operand_types[wanted].bitfield.tbyte));
2113 }
2114
2115 /* Return 1 if there is no conflict in SIMD register between operand
2116    GIVEN and opeand WANTED for instruction template T.  */
2117
2118 static INLINE int
2119 match_simd_size (const insn_template *t, unsigned int wanted,
2120                  unsigned int given)
2121 {
2122   return !((i.types[given].bitfield.xmmword
2123             && !t->operand_types[wanted].bitfield.xmmword)
2124            || (i.types[given].bitfield.ymmword
2125                && !t->operand_types[wanted].bitfield.ymmword)
2126            || (i.types[given].bitfield.zmmword
2127                && !t->operand_types[wanted].bitfield.zmmword));
2128 }
2129
2130 /* Return 1 if there is no conflict in any size between operand GIVEN
2131    and opeand WANTED for instruction template T.  */
2132
2133 static INLINE int
2134 match_mem_size (const insn_template *t, unsigned int wanted,
2135                 unsigned int given)
2136 {
2137   return (match_operand_size (t, wanted, given)
2138           && !((i.types[given].bitfield.unspecified
2139                 && !i.broadcast
2140                 && !t->operand_types[wanted].bitfield.unspecified)
2141                || (i.types[given].bitfield.fword
2142                    && !t->operand_types[wanted].bitfield.fword)
2143                /* For scalar opcode templates to allow register and memory
2144                   operands at the same time, some special casing is needed
2145                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2146                   down-conversion vpmov*.  */
2147                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2148                     && !t->opcode_modifier.broadcast
2149                     && (t->operand_types[wanted].bitfield.byte
2150                         || t->operand_types[wanted].bitfield.word
2151                         || t->operand_types[wanted].bitfield.dword
2152                         || t->operand_types[wanted].bitfield.qword))
2153                    ? (i.types[given].bitfield.xmmword
2154                       || i.types[given].bitfield.ymmword
2155                       || i.types[given].bitfield.zmmword)
2156                    : !match_simd_size(t, wanted, given))));
2157 }
2158
2159 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2160    operands for instruction template T, and it has MATCH_REVERSE set if there
2161    is no size conflict on any operands for the template with operands reversed
2162    (and the template allows for reversing in the first place).  */
2163
2164 #define MATCH_STRAIGHT 1
2165 #define MATCH_REVERSE  2
2166
2167 static INLINE unsigned int
2168 operand_size_match (const insn_template *t)
2169 {
2170   unsigned int j, match = MATCH_STRAIGHT;
2171
2172   /* Don't check non-absolute jump instructions.  */
2173   if (t->opcode_modifier.jump
2174       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2175     return match;
2176
2177   /* Check memory and accumulator operand size.  */
2178   for (j = 0; j < i.operands; j++)
2179     {
2180       if (i.types[j].bitfield.class != Reg
2181           && i.types[j].bitfield.class != RegSIMD
2182           && t->opcode_modifier.anysize)
2183         continue;
2184
2185       if (t->operand_types[j].bitfield.class == Reg
2186           && !match_operand_size (t, j, j))
2187         {
2188           match = 0;
2189           break;
2190         }
2191
2192       if (t->operand_types[j].bitfield.class == RegSIMD
2193           && !match_simd_size (t, j, j))
2194         {
2195           match = 0;
2196           break;
2197         }
2198
2199       if (t->operand_types[j].bitfield.instance == Accum
2200           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2201         {
2202           match = 0;
2203           break;
2204         }
2205
2206       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2207         {
2208           match = 0;
2209           break;
2210         }
2211     }
2212
2213   if (!t->opcode_modifier.d)
2214     {
2215     mismatch:
2216       if (!match)
2217         i.error = operand_size_mismatch;
2218       return match;
2219     }
2220
2221   /* Check reverse.  */
2222   gas_assert (i.operands >= 2 && i.operands <= 3);
2223
2224   for (j = 0; j < i.operands; j++)
2225     {
2226       unsigned int given = i.operands - j - 1;
2227
2228       if (t->operand_types[j].bitfield.class == Reg
2229           && !match_operand_size (t, j, given))
2230         goto mismatch;
2231
2232       if (t->operand_types[j].bitfield.class == RegSIMD
2233           && !match_simd_size (t, j, given))
2234         goto mismatch;
2235
2236       if (t->operand_types[j].bitfield.instance == Accum
2237           && (!match_operand_size (t, j, given)
2238               || !match_simd_size (t, j, given)))
2239         goto mismatch;
2240
2241       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2242         goto mismatch;
2243     }
2244
2245   return match | MATCH_REVERSE;
2246 }
2247
2248 static INLINE int
2249 operand_type_match (i386_operand_type overlap,
2250                     i386_operand_type given)
2251 {
2252   i386_operand_type temp = overlap;
2253
2254   temp.bitfield.unspecified = 0;
2255   temp.bitfield.byte = 0;
2256   temp.bitfield.word = 0;
2257   temp.bitfield.dword = 0;
2258   temp.bitfield.fword = 0;
2259   temp.bitfield.qword = 0;
2260   temp.bitfield.tbyte = 0;
2261   temp.bitfield.xmmword = 0;
2262   temp.bitfield.ymmword = 0;
2263   temp.bitfield.zmmword = 0;
2264   if (operand_type_all_zero (&temp))
2265     goto mismatch;
2266
2267   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2268     return 1;
2269
2270  mismatch:
2271   i.error = operand_type_mismatch;
2272   return 0;
2273 }
2274
2275 /* If given types g0 and g1 are registers they must be of the same type
2276    unless the expected operand type register overlap is null.
2277    Some Intel syntax memory operand size checking also happens here.  */
2278
2279 static INLINE int
2280 operand_type_register_match (i386_operand_type g0,
2281                              i386_operand_type t0,
2282                              i386_operand_type g1,
2283                              i386_operand_type t1)
2284 {
2285   if (g0.bitfield.class != Reg
2286       && g0.bitfield.class != RegSIMD
2287       && (!operand_type_check (g0, anymem)
2288           || g0.bitfield.unspecified
2289           || (t0.bitfield.class != Reg
2290               && t0.bitfield.class != RegSIMD)))
2291     return 1;
2292
2293   if (g1.bitfield.class != Reg
2294       && g1.bitfield.class != RegSIMD
2295       && (!operand_type_check (g1, anymem)
2296           || g1.bitfield.unspecified
2297           || (t1.bitfield.class != Reg
2298               && t1.bitfield.class != RegSIMD)))
2299     return 1;
2300
2301   if (g0.bitfield.byte == g1.bitfield.byte
2302       && g0.bitfield.word == g1.bitfield.word
2303       && g0.bitfield.dword == g1.bitfield.dword
2304       && g0.bitfield.qword == g1.bitfield.qword
2305       && g0.bitfield.xmmword == g1.bitfield.xmmword
2306       && g0.bitfield.ymmword == g1.bitfield.ymmword
2307       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2308     return 1;
2309
2310   if (!(t0.bitfield.byte & t1.bitfield.byte)
2311       && !(t0.bitfield.word & t1.bitfield.word)
2312       && !(t0.bitfield.dword & t1.bitfield.dword)
2313       && !(t0.bitfield.qword & t1.bitfield.qword)
2314       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2315       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2316       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2317     return 1;
2318
2319   i.error = register_type_mismatch;
2320
2321   return 0;
2322 }
2323
2324 static INLINE unsigned int
2325 register_number (const reg_entry *r)
2326 {
2327   unsigned int nr = r->reg_num;
2328
2329   if (r->reg_flags & RegRex)
2330     nr += 8;
2331
2332   if (r->reg_flags & RegVRex)
2333     nr += 16;
2334
2335   return nr;
2336 }
2337
2338 static INLINE unsigned int
2339 mode_from_disp_size (i386_operand_type t)
2340 {
2341   if (t.bitfield.disp8)
2342     return 1;
2343   else if (t.bitfield.disp16
2344            || t.bitfield.disp32
2345            || t.bitfield.disp32s)
2346     return 2;
2347   else
2348     return 0;
2349 }
2350
2351 static INLINE int
2352 fits_in_signed_byte (addressT num)
2353 {
2354   return num + 0x80 <= 0xff;
2355 }
2356
2357 static INLINE int
2358 fits_in_unsigned_byte (addressT num)
2359 {
2360   return num <= 0xff;
2361 }
2362
2363 static INLINE int
2364 fits_in_unsigned_word (addressT num)
2365 {
2366   return num <= 0xffff;
2367 }
2368
2369 static INLINE int
2370 fits_in_signed_word (addressT num)
2371 {
2372   return num + 0x8000 <= 0xffff;
2373 }
2374
2375 static INLINE int
2376 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2377 {
2378 #ifndef BFD64
2379   return 1;
2380 #else
2381   return num + 0x80000000 <= 0xffffffff;
2382 #endif
2383 }                               /* fits_in_signed_long() */
2384
2385 static INLINE int
2386 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2387 {
2388 #ifndef BFD64
2389   return 1;
2390 #else
2391   return num <= 0xffffffff;
2392 #endif
2393 }                               /* fits_in_unsigned_long() */
2394
2395 static INLINE int
2396 fits_in_disp8 (offsetT num)
2397 {
2398   int shift = i.memshift;
2399   unsigned int mask;
2400
2401   if (shift == -1)
2402     abort ();
2403
2404   mask = (1 << shift) - 1;
2405
2406   /* Return 0 if NUM isn't properly aligned.  */
2407   if ((num & mask))
2408     return 0;
2409
2410   /* Check if NUM will fit in 8bit after shift.  */
2411   return fits_in_signed_byte (num >> shift);
2412 }
2413
2414 static INLINE int
2415 fits_in_imm4 (offsetT num)
2416 {
2417   return (num & 0xf) == num;
2418 }
2419
2420 static i386_operand_type
2421 smallest_imm_type (offsetT num)
2422 {
2423   i386_operand_type t;
2424
2425   operand_type_set (&t, 0);
2426   t.bitfield.imm64 = 1;
2427
2428   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2429     {
2430       /* This code is disabled on the 486 because all the Imm1 forms
2431          in the opcode table are slower on the i486.  They're the
2432          versions with the implicitly specified single-position
2433          displacement, which has another syntax if you really want to
2434          use that form.  */
2435       t.bitfield.imm1 = 1;
2436       t.bitfield.imm8 = 1;
2437       t.bitfield.imm8s = 1;
2438       t.bitfield.imm16 = 1;
2439       t.bitfield.imm32 = 1;
2440       t.bitfield.imm32s = 1;
2441     }
2442   else if (fits_in_signed_byte (num))
2443     {
2444       t.bitfield.imm8 = 1;
2445       t.bitfield.imm8s = 1;
2446       t.bitfield.imm16 = 1;
2447       t.bitfield.imm32 = 1;
2448       t.bitfield.imm32s = 1;
2449     }
2450   else if (fits_in_unsigned_byte (num))
2451     {
2452       t.bitfield.imm8 = 1;
2453       t.bitfield.imm16 = 1;
2454       t.bitfield.imm32 = 1;
2455       t.bitfield.imm32s = 1;
2456     }
2457   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2458     {
2459       t.bitfield.imm16 = 1;
2460       t.bitfield.imm32 = 1;
2461       t.bitfield.imm32s = 1;
2462     }
2463   else if (fits_in_signed_long (num))
2464     {
2465       t.bitfield.imm32 = 1;
2466       t.bitfield.imm32s = 1;
2467     }
2468   else if (fits_in_unsigned_long (num))
2469     t.bitfield.imm32 = 1;
2470
2471   return t;
2472 }
2473
2474 static offsetT
2475 offset_in_range (offsetT val, int size)
2476 {
2477   addressT mask;
2478
2479   switch (size)
2480     {
2481     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2482     case 2: mask = ((addressT) 1 << 16) - 1; break;
2483     case 4: mask = ((addressT) 2 << 31) - 1; break;
2484 #ifdef BFD64
2485     case 8: mask = ((addressT) 2 << 63) - 1; break;
2486 #endif
2487     default: abort ();
2488     }
2489
2490 #ifdef BFD64
2491   /* If BFD64, sign extend val for 32bit address mode.  */
2492   if (flag_code != CODE_64BIT
2493       || i.prefix[ADDR_PREFIX])
2494     if ((val & ~(((addressT) 2 << 31) - 1)) == 0)
2495       val = (val ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2496 #endif
2497
2498   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2499     {
2500       char buf1[40], buf2[40];
2501
2502       sprint_value (buf1, val);
2503       sprint_value (buf2, val & mask);
2504       as_warn (_("%s shortened to %s"), buf1, buf2);
2505     }
2506   return val & mask;
2507 }
2508
2509 enum PREFIX_GROUP
2510 {
2511   PREFIX_EXIST = 0,
2512   PREFIX_LOCK,
2513   PREFIX_REP,
2514   PREFIX_DS,
2515   PREFIX_OTHER
2516 };
2517
2518 /* Returns
2519    a. PREFIX_EXIST if attempting to add a prefix where one from the
2520    same class already exists.
2521    b. PREFIX_LOCK if lock prefix is added.
2522    c. PREFIX_REP if rep/repne prefix is added.
2523    d. PREFIX_DS if ds prefix is added.
2524    e. PREFIX_OTHER if other prefix is added.
2525  */
2526
2527 static enum PREFIX_GROUP
2528 add_prefix (unsigned int prefix)
2529 {
2530   enum PREFIX_GROUP ret = PREFIX_OTHER;
2531   unsigned int q;
2532
2533   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2534       && flag_code == CODE_64BIT)
2535     {
2536       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2537           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2538           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2539           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2540         ret = PREFIX_EXIST;
2541       q = REX_PREFIX;
2542     }
2543   else
2544     {
2545       switch (prefix)
2546         {
2547         default:
2548           abort ();
2549
2550         case DS_PREFIX_OPCODE:
2551           ret = PREFIX_DS;
2552           /* Fall through.  */
2553         case CS_PREFIX_OPCODE:
2554         case ES_PREFIX_OPCODE:
2555         case FS_PREFIX_OPCODE:
2556         case GS_PREFIX_OPCODE:
2557         case SS_PREFIX_OPCODE:
2558           q = SEG_PREFIX;
2559           break;
2560
2561         case REPNE_PREFIX_OPCODE:
2562         case REPE_PREFIX_OPCODE:
2563           q = REP_PREFIX;
2564           ret = PREFIX_REP;
2565           break;
2566
2567         case LOCK_PREFIX_OPCODE:
2568           q = LOCK_PREFIX;
2569           ret = PREFIX_LOCK;
2570           break;
2571
2572         case FWAIT_OPCODE:
2573           q = WAIT_PREFIX;
2574           break;
2575
2576         case ADDR_PREFIX_OPCODE:
2577           q = ADDR_PREFIX;
2578           break;
2579
2580         case DATA_PREFIX_OPCODE:
2581           q = DATA_PREFIX;
2582           break;
2583         }
2584       if (i.prefix[q] != 0)
2585         ret = PREFIX_EXIST;
2586     }
2587
2588   if (ret)
2589     {
2590       if (!i.prefix[q])
2591         ++i.prefixes;
2592       i.prefix[q] |= prefix;
2593     }
2594   else
2595     as_bad (_("same type of prefix used twice"));
2596
2597   return ret;
2598 }
2599
2600 static void
2601 update_code_flag (int value, int check)
2602 {
2603   PRINTF_LIKE ((*as_error));
2604
2605   flag_code = (enum flag_code) value;
2606   if (flag_code == CODE_64BIT)
2607     {
2608       cpu_arch_flags.bitfield.cpu64 = 1;
2609       cpu_arch_flags.bitfield.cpuno64 = 0;
2610     }
2611   else
2612     {
2613       cpu_arch_flags.bitfield.cpu64 = 0;
2614       cpu_arch_flags.bitfield.cpuno64 = 1;
2615     }
2616   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2617     {
2618       if (check)
2619         as_error = as_fatal;
2620       else
2621         as_error = as_bad;
2622       (*as_error) (_("64bit mode not supported on `%s'."),
2623                    cpu_arch_name ? cpu_arch_name : default_arch);
2624     }
2625   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2626     {
2627       if (check)
2628         as_error = as_fatal;
2629       else
2630         as_error = as_bad;
2631       (*as_error) (_("32bit mode not supported on `%s'."),
2632                    cpu_arch_name ? cpu_arch_name : default_arch);
2633     }
2634   stackop_size = '\0';
2635 }
2636
2637 static void
2638 set_code_flag (int value)
2639 {
2640   update_code_flag (value, 0);
2641 }
2642
2643 static void
2644 set_16bit_gcc_code_flag (int new_code_flag)
2645 {
2646   flag_code = (enum flag_code) new_code_flag;
2647   if (flag_code != CODE_16BIT)
2648     abort ();
2649   cpu_arch_flags.bitfield.cpu64 = 0;
2650   cpu_arch_flags.bitfield.cpuno64 = 1;
2651   stackop_size = LONG_MNEM_SUFFIX;
2652 }
2653
2654 static void
2655 set_intel_syntax (int syntax_flag)
2656 {
2657   /* Find out if register prefixing is specified.  */
2658   int ask_naked_reg = 0;
2659
2660   SKIP_WHITESPACE ();
2661   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2662     {
2663       char *string;
2664       int e = get_symbol_name (&string);
2665
2666       if (strcmp (string, "prefix") == 0)
2667         ask_naked_reg = 1;
2668       else if (strcmp (string, "noprefix") == 0)
2669         ask_naked_reg = -1;
2670       else
2671         as_bad (_("bad argument to syntax directive."));
2672       (void) restore_line_pointer (e);
2673     }
2674   demand_empty_rest_of_line ();
2675
2676   intel_syntax = syntax_flag;
2677
2678   if (ask_naked_reg == 0)
2679     allow_naked_reg = (intel_syntax
2680                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2681   else
2682     allow_naked_reg = (ask_naked_reg < 0);
2683
2684   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2685
2686   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2687   identifier_chars['$'] = intel_syntax ? '$' : 0;
2688   register_prefix = allow_naked_reg ? "" : "%";
2689 }
2690
2691 static void
2692 set_intel_mnemonic (int mnemonic_flag)
2693 {
2694   intel_mnemonic = mnemonic_flag;
2695 }
2696
2697 static void
2698 set_allow_index_reg (int flag)
2699 {
2700   allow_index_reg = flag;
2701 }
2702
2703 static void
2704 set_check (int what)
2705 {
2706   enum check_kind *kind;
2707   const char *str;
2708
2709   if (what)
2710     {
2711       kind = &operand_check;
2712       str = "operand";
2713     }
2714   else
2715     {
2716       kind = &sse_check;
2717       str = "sse";
2718     }
2719
2720   SKIP_WHITESPACE ();
2721
2722   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2723     {
2724       char *string;
2725       int e = get_symbol_name (&string);
2726
2727       if (strcmp (string, "none") == 0)
2728         *kind = check_none;
2729       else if (strcmp (string, "warning") == 0)
2730         *kind = check_warning;
2731       else if (strcmp (string, "error") == 0)
2732         *kind = check_error;
2733       else
2734         as_bad (_("bad argument to %s_check directive."), str);
2735       (void) restore_line_pointer (e);
2736     }
2737   else
2738     as_bad (_("missing argument for %s_check directive"), str);
2739
2740   demand_empty_rest_of_line ();
2741 }
2742
2743 static void
2744 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2745                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2746 {
2747 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2748   static const char *arch;
2749
2750   /* Intel LIOM is only supported on ELF.  */
2751   if (!IS_ELF)
2752     return;
2753
2754   if (!arch)
2755     {
2756       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2757          use default_arch.  */
2758       arch = cpu_arch_name;
2759       if (!arch)
2760         arch = default_arch;
2761     }
2762
2763   /* If we are targeting Intel MCU, we must enable it.  */
2764   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2765       || new_flag.bitfield.cpuiamcu)
2766     return;
2767
2768   /* If we are targeting Intel L1OM, we must enable it.  */
2769   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2770       || new_flag.bitfield.cpul1om)
2771     return;
2772
2773   /* If we are targeting Intel K1OM, we must enable it.  */
2774   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2775       || new_flag.bitfield.cpuk1om)
2776     return;
2777
2778   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2779 #endif
2780 }
2781
2782 static void
2783 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2784 {
2785   SKIP_WHITESPACE ();
2786
2787   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2788     {
2789       char *string;
2790       int e = get_symbol_name (&string);
2791       unsigned int j;
2792       i386_cpu_flags flags;
2793
2794       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2795         {
2796           if (strcmp (string, cpu_arch[j].name) == 0)
2797             {
2798               check_cpu_arch_compatible (string, cpu_arch[j].flags);
2799
2800               if (*string != '.')
2801                 {
2802                   cpu_arch_name = cpu_arch[j].name;
2803                   cpu_sub_arch_name = NULL;
2804                   cpu_arch_flags = cpu_arch[j].flags;
2805                   if (flag_code == CODE_64BIT)
2806                     {
2807                       cpu_arch_flags.bitfield.cpu64 = 1;
2808                       cpu_arch_flags.bitfield.cpuno64 = 0;
2809                     }
2810                   else
2811                     {
2812                       cpu_arch_flags.bitfield.cpu64 = 0;
2813                       cpu_arch_flags.bitfield.cpuno64 = 1;
2814                     }
2815                   cpu_arch_isa = cpu_arch[j].type;
2816                   cpu_arch_isa_flags = cpu_arch[j].flags;
2817                   if (!cpu_arch_tune_set)
2818                     {
2819                       cpu_arch_tune = cpu_arch_isa;
2820                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2821                     }
2822                   break;
2823                 }
2824
2825               flags = cpu_flags_or (cpu_arch_flags,
2826                                     cpu_arch[j].flags);
2827
2828               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2829                 {
2830                   if (cpu_sub_arch_name)
2831                     {
2832                       char *name = cpu_sub_arch_name;
2833                       cpu_sub_arch_name = concat (name,
2834                                                   cpu_arch[j].name,
2835                                                   (const char *) NULL);
2836                       free (name);
2837                     }
2838                   else
2839                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2840                   cpu_arch_flags = flags;
2841                   cpu_arch_isa_flags = flags;
2842                 }
2843               else
2844                 cpu_arch_isa_flags
2845                   = cpu_flags_or (cpu_arch_isa_flags,
2846                                   cpu_arch[j].flags);
2847               (void) restore_line_pointer (e);
2848               demand_empty_rest_of_line ();
2849               return;
2850             }
2851         }
2852
2853       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2854         {
2855           /* Disable an ISA extension.  */
2856           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2857             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2858               {
2859                 flags = cpu_flags_and_not (cpu_arch_flags,
2860                                            cpu_noarch[j].flags);
2861                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2862                   {
2863                     if (cpu_sub_arch_name)
2864                       {
2865                         char *name = cpu_sub_arch_name;
2866                         cpu_sub_arch_name = concat (name, string,
2867                                                     (const char *) NULL);
2868                         free (name);
2869                       }
2870                     else
2871                       cpu_sub_arch_name = xstrdup (string);
2872                     cpu_arch_flags = flags;
2873                     cpu_arch_isa_flags = flags;
2874                   }
2875                 (void) restore_line_pointer (e);
2876                 demand_empty_rest_of_line ();
2877                 return;
2878               }
2879
2880           j = ARRAY_SIZE (cpu_arch);
2881         }
2882
2883       if (j >= ARRAY_SIZE (cpu_arch))
2884         as_bad (_("no such architecture: `%s'"), string);
2885
2886       *input_line_pointer = e;
2887     }
2888   else
2889     as_bad (_("missing cpu architecture"));
2890
2891   no_cond_jump_promotion = 0;
2892   if (*input_line_pointer == ','
2893       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2894     {
2895       char *string;
2896       char e;
2897
2898       ++input_line_pointer;
2899       e = get_symbol_name (&string);
2900
2901       if (strcmp (string, "nojumps") == 0)
2902         no_cond_jump_promotion = 1;
2903       else if (strcmp (string, "jumps") == 0)
2904         ;
2905       else
2906         as_bad (_("no such architecture modifier: `%s'"), string);
2907
2908       (void) restore_line_pointer (e);
2909     }
2910
2911   demand_empty_rest_of_line ();
2912 }
2913
2914 enum bfd_architecture
2915 i386_arch (void)
2916 {
2917   if (cpu_arch_isa == PROCESSOR_L1OM)
2918     {
2919       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2920           || flag_code != CODE_64BIT)
2921         as_fatal (_("Intel L1OM is 64bit ELF only"));
2922       return bfd_arch_l1om;
2923     }
2924   else if (cpu_arch_isa == PROCESSOR_K1OM)
2925     {
2926       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2927           || flag_code != CODE_64BIT)
2928         as_fatal (_("Intel K1OM is 64bit ELF only"));
2929       return bfd_arch_k1om;
2930     }
2931   else if (cpu_arch_isa == PROCESSOR_IAMCU)
2932     {
2933       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2934           || flag_code == CODE_64BIT)
2935         as_fatal (_("Intel MCU is 32bit ELF only"));
2936       return bfd_arch_iamcu;
2937     }
2938   else
2939     return bfd_arch_i386;
2940 }
2941
2942 unsigned long
2943 i386_mach (void)
2944 {
2945   if (!strncmp (default_arch, "x86_64", 6))
2946     {
2947       if (cpu_arch_isa == PROCESSOR_L1OM)
2948         {
2949           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2950               || default_arch[6] != '\0')
2951             as_fatal (_("Intel L1OM is 64bit ELF only"));
2952           return bfd_mach_l1om;
2953         }
2954       else if (cpu_arch_isa == PROCESSOR_K1OM)
2955         {
2956           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2957               || default_arch[6] != '\0')
2958             as_fatal (_("Intel K1OM is 64bit ELF only"));
2959           return bfd_mach_k1om;
2960         }
2961       else if (default_arch[6] == '\0')
2962         return bfd_mach_x86_64;
2963       else
2964         return bfd_mach_x64_32;
2965     }
2966   else if (!strcmp (default_arch, "i386")
2967            || !strcmp (default_arch, "iamcu"))
2968     {
2969       if (cpu_arch_isa == PROCESSOR_IAMCU)
2970         {
2971           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2972             as_fatal (_("Intel MCU is 32bit ELF only"));
2973           return bfd_mach_i386_iamcu;
2974         }
2975       else
2976         return bfd_mach_i386_i386;
2977     }
2978   else
2979     as_fatal (_("unknown architecture"));
2980 }
2981 \f
2982 void
2983 md_begin (void)
2984 {
2985   const char *hash_err;
2986
2987   /* Support pseudo prefixes like {disp32}.  */
2988   lex_type ['{'] = LEX_BEGIN_NAME;
2989
2990   /* Initialize op_hash hash table.  */
2991   op_hash = hash_new ();
2992
2993   {
2994     const insn_template *optab;
2995     templates *core_optab;
2996
2997     /* Setup for loop.  */
2998     optab = i386_optab;
2999     core_optab = XNEW (templates);
3000     core_optab->start = optab;
3001
3002     while (1)
3003       {
3004         ++optab;
3005         if (optab->name == NULL
3006             || strcmp (optab->name, (optab - 1)->name) != 0)
3007           {
3008             /* different name --> ship out current template list;
3009                add to hash table; & begin anew.  */
3010             core_optab->end = optab;
3011             hash_err = hash_insert (op_hash,
3012                                     (optab - 1)->name,
3013                                     (void *) core_optab);
3014             if (hash_err)
3015               {
3016                 as_fatal (_("can't hash %s: %s"),
3017                           (optab - 1)->name,
3018                           hash_err);
3019               }
3020             if (optab->name == NULL)
3021               break;
3022             core_optab = XNEW (templates);
3023             core_optab->start = optab;
3024           }
3025       }
3026   }
3027
3028   /* Initialize reg_hash hash table.  */
3029   reg_hash = hash_new ();
3030   {
3031     const reg_entry *regtab;
3032     unsigned int regtab_size = i386_regtab_size;
3033
3034     for (regtab = i386_regtab; regtab_size--; regtab++)
3035       {
3036         hash_err = hash_insert (reg_hash, regtab->reg_name, (void *) regtab);
3037         if (hash_err)
3038           as_fatal (_("can't hash %s: %s"),
3039                     regtab->reg_name,
3040                     hash_err);
3041       }
3042   }
3043
3044   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3045   {
3046     int c;
3047     char *p;
3048
3049     for (c = 0; c < 256; c++)
3050       {
3051         if (ISDIGIT (c))
3052           {
3053             digit_chars[c] = c;
3054             mnemonic_chars[c] = c;
3055             register_chars[c] = c;
3056             operand_chars[c] = c;
3057           }
3058         else if (ISLOWER (c))
3059           {
3060             mnemonic_chars[c] = c;
3061             register_chars[c] = c;
3062             operand_chars[c] = c;
3063           }
3064         else if (ISUPPER (c))
3065           {
3066             mnemonic_chars[c] = TOLOWER (c);
3067             register_chars[c] = mnemonic_chars[c];
3068             operand_chars[c] = c;
3069           }
3070         else if (c == '{' || c == '}')
3071           {
3072             mnemonic_chars[c] = c;
3073             operand_chars[c] = c;
3074           }
3075
3076         if (ISALPHA (c) || ISDIGIT (c))
3077           identifier_chars[c] = c;
3078         else if (c >= 128)
3079           {
3080             identifier_chars[c] = c;
3081             operand_chars[c] = c;
3082           }
3083       }
3084
3085 #ifdef LEX_AT
3086     identifier_chars['@'] = '@';
3087 #endif
3088 #ifdef LEX_QM
3089     identifier_chars['?'] = '?';
3090     operand_chars['?'] = '?';
3091 #endif
3092     digit_chars['-'] = '-';
3093     mnemonic_chars['_'] = '_';
3094     mnemonic_chars['-'] = '-';
3095     mnemonic_chars['.'] = '.';
3096     identifier_chars['_'] = '_';
3097     identifier_chars['.'] = '.';
3098
3099     for (p = operand_special_chars; *p != '\0'; p++)
3100       operand_chars[(unsigned char) *p] = *p;
3101   }
3102
3103   if (flag_code == CODE_64BIT)
3104     {
3105 #if defined (OBJ_COFF) && defined (TE_PE)
3106       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3107                                   ? 32 : 16);
3108 #else
3109       x86_dwarf2_return_column = 16;
3110 #endif
3111       x86_cie_data_alignment = -8;
3112     }
3113   else
3114     {
3115       x86_dwarf2_return_column = 8;
3116       x86_cie_data_alignment = -4;
3117     }
3118
3119   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3120      can be turned into BRANCH_PREFIX frag.  */
3121   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3122     abort ();
3123 }
3124
3125 void
3126 i386_print_statistics (FILE *file)
3127 {
3128   hash_print_statistics (file, "i386 opcode", op_hash);
3129   hash_print_statistics (file, "i386 register", reg_hash);
3130 }
3131 \f
3132 #ifdef DEBUG386
3133
3134 /* Debugging routines for md_assemble.  */
3135 static void pte (insn_template *);
3136 static void pt (i386_operand_type);
3137 static void pe (expressionS *);
3138 static void ps (symbolS *);
3139
3140 static void
3141 pi (const char *line, i386_insn *x)
3142 {
3143   unsigned int j;
3144
3145   fprintf (stdout, "%s: template ", line);
3146   pte (&x->tm);
3147   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3148            x->base_reg ? x->base_reg->reg_name : "none",
3149            x->index_reg ? x->index_reg->reg_name : "none",
3150            x->log2_scale_factor);
3151   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3152            x->rm.mode, x->rm.reg, x->rm.regmem);
3153   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3154            x->sib.base, x->sib.index, x->sib.scale);
3155   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3156            (x->rex & REX_W) != 0,
3157            (x->rex & REX_R) != 0,
3158            (x->rex & REX_X) != 0,
3159            (x->rex & REX_B) != 0);
3160   for (j = 0; j < x->operands; j++)
3161     {
3162       fprintf (stdout, "    #%d:  ", j + 1);
3163       pt (x->types[j]);
3164       fprintf (stdout, "\n");
3165       if (x->types[j].bitfield.class == Reg
3166           || x->types[j].bitfield.class == RegMMX
3167           || x->types[j].bitfield.class == RegSIMD
3168           || x->types[j].bitfield.class == SReg
3169           || x->types[j].bitfield.class == RegCR
3170           || x->types[j].bitfield.class == RegDR
3171           || x->types[j].bitfield.class == RegTR)
3172         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3173       if (operand_type_check (x->types[j], imm))
3174         pe (x->op[j].imms);
3175       if (operand_type_check (x->types[j], disp))
3176         pe (x->op[j].disps);
3177     }
3178 }
3179
3180 static void
3181 pte (insn_template *t)
3182 {
3183   unsigned int j;
3184   fprintf (stdout, " %d operands ", t->operands);
3185   fprintf (stdout, "opcode %x ", t->base_opcode);
3186   if (t->extension_opcode != None)
3187     fprintf (stdout, "ext %x ", t->extension_opcode);
3188   if (t->opcode_modifier.d)
3189     fprintf (stdout, "D");
3190   if (t->opcode_modifier.w)
3191     fprintf (stdout, "W");
3192   fprintf (stdout, "\n");
3193   for (j = 0; j < t->operands; j++)
3194     {
3195       fprintf (stdout, "    #%d type ", j + 1);
3196       pt (t->operand_types[j]);
3197       fprintf (stdout, "\n");
3198     }
3199 }
3200
3201 static void
3202 pe (expressionS *e)
3203 {
3204   fprintf (stdout, "    operation     %d\n", e->X_op);
3205   fprintf (stdout, "    add_number    %ld (%lx)\n",
3206            (long) e->X_add_number, (long) e->X_add_number);
3207   if (e->X_add_symbol)
3208     {
3209       fprintf (stdout, "    add_symbol    ");
3210       ps (e->X_add_symbol);
3211       fprintf (stdout, "\n");
3212     }
3213   if (e->X_op_symbol)
3214     {
3215       fprintf (stdout, "    op_symbol    ");
3216       ps (e->X_op_symbol);
3217       fprintf (stdout, "\n");
3218     }
3219 }
3220
3221 static void
3222 ps (symbolS *s)
3223 {
3224   fprintf (stdout, "%s type %s%s",
3225            S_GET_NAME (s),
3226            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3227            segment_name (S_GET_SEGMENT (s)));
3228 }
3229
3230 static struct type_name
3231   {
3232     i386_operand_type mask;
3233     const char *name;
3234   }
3235 const type_names[] =
3236 {
3237   { OPERAND_TYPE_REG8, "r8" },
3238   { OPERAND_TYPE_REG16, "r16" },
3239   { OPERAND_TYPE_REG32, "r32" },
3240   { OPERAND_TYPE_REG64, "r64" },
3241   { OPERAND_TYPE_ACC8, "acc8" },
3242   { OPERAND_TYPE_ACC16, "acc16" },
3243   { OPERAND_TYPE_ACC32, "acc32" },
3244   { OPERAND_TYPE_ACC64, "acc64" },
3245   { OPERAND_TYPE_IMM8, "i8" },
3246   { OPERAND_TYPE_IMM8, "i8s" },
3247   { OPERAND_TYPE_IMM16, "i16" },
3248   { OPERAND_TYPE_IMM32, "i32" },
3249   { OPERAND_TYPE_IMM32S, "i32s" },
3250   { OPERAND_TYPE_IMM64, "i64" },
3251   { OPERAND_TYPE_IMM1, "i1" },
3252   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3253   { OPERAND_TYPE_DISP8, "d8" },
3254   { OPERAND_TYPE_DISP16, "d16" },
3255   { OPERAND_TYPE_DISP32, "d32" },
3256   { OPERAND_TYPE_DISP32S, "d32s" },
3257   { OPERAND_TYPE_DISP64, "d64" },
3258   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3259   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3260   { OPERAND_TYPE_CONTROL, "control reg" },
3261   { OPERAND_TYPE_TEST, "test reg" },
3262   { OPERAND_TYPE_DEBUG, "debug reg" },
3263   { OPERAND_TYPE_FLOATREG, "FReg" },
3264   { OPERAND_TYPE_FLOATACC, "FAcc" },
3265   { OPERAND_TYPE_SREG, "SReg" },
3266   { OPERAND_TYPE_REGMMX, "rMMX" },
3267   { OPERAND_TYPE_REGXMM, "rXMM" },
3268   { OPERAND_TYPE_REGYMM, "rYMM" },
3269   { OPERAND_TYPE_REGZMM, "rZMM" },
3270   { OPERAND_TYPE_REGMASK, "Mask reg" },
3271 };
3272
3273 static void
3274 pt (i386_operand_type t)
3275 {
3276   unsigned int j;
3277   i386_operand_type a;
3278
3279   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3280     {
3281       a = operand_type_and (t, type_names[j].mask);
3282       if (operand_type_equal (&a, &type_names[j].mask))
3283         fprintf (stdout, "%s, ",  type_names[j].name);
3284     }
3285   fflush (stdout);
3286 }
3287
3288 #endif /* DEBUG386 */
3289 \f
3290 static bfd_reloc_code_real_type
3291 reloc (unsigned int size,
3292        int pcrel,
3293        int sign,
3294        bfd_reloc_code_real_type other)
3295 {
3296   if (other != NO_RELOC)
3297     {
3298       reloc_howto_type *rel;
3299
3300       if (size == 8)
3301         switch (other)
3302           {
3303           case BFD_RELOC_X86_64_GOT32:
3304             return BFD_RELOC_X86_64_GOT64;
3305             break;
3306           case BFD_RELOC_X86_64_GOTPLT64:
3307             return BFD_RELOC_X86_64_GOTPLT64;
3308             break;
3309           case BFD_RELOC_X86_64_PLTOFF64:
3310             return BFD_RELOC_X86_64_PLTOFF64;
3311             break;
3312           case BFD_RELOC_X86_64_GOTPC32:
3313             other = BFD_RELOC_X86_64_GOTPC64;
3314             break;
3315           case BFD_RELOC_X86_64_GOTPCREL:
3316             other = BFD_RELOC_X86_64_GOTPCREL64;
3317             break;
3318           case BFD_RELOC_X86_64_TPOFF32:
3319             other = BFD_RELOC_X86_64_TPOFF64;
3320             break;
3321           case BFD_RELOC_X86_64_DTPOFF32:
3322             other = BFD_RELOC_X86_64_DTPOFF64;
3323             break;
3324           default:
3325             break;
3326           }
3327
3328 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3329       if (other == BFD_RELOC_SIZE32)
3330         {
3331           if (size == 8)
3332             other = BFD_RELOC_SIZE64;
3333           if (pcrel)
3334             {
3335               as_bad (_("there are no pc-relative size relocations"));
3336               return NO_RELOC;
3337             }
3338         }
3339 #endif
3340
3341       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3342       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3343         sign = -1;
3344
3345       rel = bfd_reloc_type_lookup (stdoutput, other);
3346       if (!rel)
3347         as_bad (_("unknown relocation (%u)"), other);
3348       else if (size != bfd_get_reloc_size (rel))
3349         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3350                 bfd_get_reloc_size (rel),
3351                 size);
3352       else if (pcrel && !rel->pc_relative)
3353         as_bad (_("non-pc-relative relocation for pc-relative field"));
3354       else if ((rel->complain_on_overflow == complain_overflow_signed
3355                 && !sign)
3356                || (rel->complain_on_overflow == complain_overflow_unsigned
3357                    && sign > 0))
3358         as_bad (_("relocated field and relocation type differ in signedness"));
3359       else
3360         return other;
3361       return NO_RELOC;
3362     }
3363
3364   if (pcrel)
3365     {
3366       if (!sign)
3367         as_bad (_("there are no unsigned pc-relative relocations"));
3368       switch (size)
3369         {
3370         case 1: return BFD_RELOC_8_PCREL;
3371         case 2: return BFD_RELOC_16_PCREL;
3372         case 4: return BFD_RELOC_32_PCREL;
3373         case 8: return BFD_RELOC_64_PCREL;
3374         }
3375       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3376     }
3377   else
3378     {
3379       if (sign > 0)
3380         switch (size)
3381           {
3382           case 4: return BFD_RELOC_X86_64_32S;
3383           }
3384       else
3385         switch (size)
3386           {
3387           case 1: return BFD_RELOC_8;
3388           case 2: return BFD_RELOC_16;
3389           case 4: return BFD_RELOC_32;
3390           case 8: return BFD_RELOC_64;
3391           }
3392       as_bad (_("cannot do %s %u byte relocation"),
3393               sign > 0 ? "signed" : "unsigned", size);
3394     }
3395
3396   return NO_RELOC;
3397 }
3398
3399 /* Here we decide which fixups can be adjusted to make them relative to
3400    the beginning of the section instead of the symbol.  Basically we need
3401    to make sure that the dynamic relocations are done correctly, so in
3402    some cases we force the original symbol to be used.  */
3403
3404 int
3405 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3406 {
3407 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3408   if (!IS_ELF)
3409     return 1;
3410
3411   /* Don't adjust pc-relative references to merge sections in 64-bit
3412      mode.  */
3413   if (use_rela_relocations
3414       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3415       && fixP->fx_pcrel)
3416     return 0;
3417
3418   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3419      and changed later by validate_fix.  */
3420   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3421       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3422     return 0;
3423
3424   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3425      for size relocations.  */
3426   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3427       || fixP->fx_r_type == BFD_RELOC_SIZE64
3428       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3429       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3430       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3431       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3432       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3433       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3434       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3435       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3436       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3437       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3438       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3439       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3440       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3441       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3442       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3443       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3444       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3445       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3446       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3447       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3448       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3449       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3450       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3451       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3452       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3453       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3454       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3455       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3456       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3457     return 0;
3458 #endif
3459   return 1;
3460 }
3461
3462 static int
3463 intel_float_operand (const char *mnemonic)
3464 {
3465   /* Note that the value returned is meaningful only for opcodes with (memory)
3466      operands, hence the code here is free to improperly handle opcodes that
3467      have no operands (for better performance and smaller code). */
3468
3469   if (mnemonic[0] != 'f')
3470     return 0; /* non-math */
3471
3472   switch (mnemonic[1])
3473     {
3474     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3475        the fs segment override prefix not currently handled because no
3476        call path can make opcodes without operands get here */
3477     case 'i':
3478       return 2 /* integer op */;
3479     case 'l':
3480       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3481         return 3; /* fldcw/fldenv */
3482       break;
3483     case 'n':
3484       if (mnemonic[2] != 'o' /* fnop */)
3485         return 3; /* non-waiting control op */
3486       break;
3487     case 'r':
3488       if (mnemonic[2] == 's')
3489         return 3; /* frstor/frstpm */
3490       break;
3491     case 's':
3492       if (mnemonic[2] == 'a')
3493         return 3; /* fsave */
3494       if (mnemonic[2] == 't')
3495         {
3496           switch (mnemonic[3])
3497             {
3498             case 'c': /* fstcw */
3499             case 'd': /* fstdw */
3500             case 'e': /* fstenv */
3501             case 's': /* fsts[gw] */
3502               return 3;
3503             }
3504         }
3505       break;
3506     case 'x':
3507       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3508         return 0; /* fxsave/fxrstor are not really math ops */
3509       break;
3510     }
3511
3512   return 1;
3513 }
3514
3515 /* Build the VEX prefix.  */
3516
3517 static void
3518 build_vex_prefix (const insn_template *t)
3519 {
3520   unsigned int register_specifier;
3521   unsigned int implied_prefix;
3522   unsigned int vector_length;
3523   unsigned int w;
3524
3525   /* Check register specifier.  */
3526   if (i.vex.register_specifier)
3527     {
3528       register_specifier =
3529         ~register_number (i.vex.register_specifier) & 0xf;
3530       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3531     }
3532   else
3533     register_specifier = 0xf;
3534
3535   /* Use 2-byte VEX prefix by swapping destination and source operand
3536      if there are more than 1 register operand.  */
3537   if (i.reg_operands > 1
3538       && i.vec_encoding != vex_encoding_vex3
3539       && i.dir_encoding == dir_encoding_default
3540       && i.operands == i.reg_operands
3541       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3542       && i.tm.opcode_modifier.vexopcode == VEX0F
3543       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3544       && i.rex == REX_B)
3545     {
3546       unsigned int xchg = i.operands - 1;
3547       union i386_op temp_op;
3548       i386_operand_type temp_type;
3549
3550       temp_type = i.types[xchg];
3551       i.types[xchg] = i.types[0];
3552       i.types[0] = temp_type;
3553       temp_op = i.op[xchg];
3554       i.op[xchg] = i.op[0];
3555       i.op[0] = temp_op;
3556
3557       gas_assert (i.rm.mode == 3);
3558
3559       i.rex = REX_R;
3560       xchg = i.rm.regmem;
3561       i.rm.regmem = i.rm.reg;
3562       i.rm.reg = xchg;
3563
3564       if (i.tm.opcode_modifier.d)
3565         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3566                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3567       else /* Use the next insn.  */
3568         i.tm = t[1];
3569     }
3570
3571   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3572      are no memory operands and at least 3 register ones.  */
3573   if (i.reg_operands >= 3
3574       && i.vec_encoding != vex_encoding_vex3
3575       && i.reg_operands == i.operands - i.imm_operands
3576       && i.tm.opcode_modifier.vex
3577       && i.tm.opcode_modifier.commutative
3578       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3579       && i.rex == REX_B
3580       && i.vex.register_specifier
3581       && !(i.vex.register_specifier->reg_flags & RegRex))
3582     {
3583       unsigned int xchg = i.operands - i.reg_operands;
3584       union i386_op temp_op;
3585       i386_operand_type temp_type;
3586
3587       gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F);
3588       gas_assert (!i.tm.opcode_modifier.sae);
3589       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3590                                       &i.types[i.operands - 3]));
3591       gas_assert (i.rm.mode == 3);
3592
3593       temp_type = i.types[xchg];
3594       i.types[xchg] = i.types[xchg + 1];
3595       i.types[xchg + 1] = temp_type;
3596       temp_op = i.op[xchg];
3597       i.op[xchg] = i.op[xchg + 1];
3598       i.op[xchg + 1] = temp_op;
3599
3600       i.rex = 0;
3601       xchg = i.rm.regmem | 8;
3602       i.rm.regmem = ~register_specifier & 0xf;
3603       gas_assert (!(i.rm.regmem & 8));
3604       i.vex.register_specifier += xchg - i.rm.regmem;
3605       register_specifier = ~xchg & 0xf;
3606     }
3607
3608   if (i.tm.opcode_modifier.vex == VEXScalar)
3609     vector_length = avxscalar;
3610   else if (i.tm.opcode_modifier.vex == VEX256)
3611     vector_length = 1;
3612   else
3613     {
3614       unsigned int op;
3615
3616       /* Determine vector length from the last multi-length vector
3617          operand.  */
3618       vector_length = 0;
3619       for (op = t->operands; op--;)
3620         if (t->operand_types[op].bitfield.xmmword
3621             && t->operand_types[op].bitfield.ymmword
3622             && i.types[op].bitfield.ymmword)
3623           {
3624             vector_length = 1;
3625             break;
3626           }
3627     }
3628
3629   switch ((i.tm.base_opcode >> 8) & 0xff)
3630     {
3631     case 0:
3632       implied_prefix = 0;
3633       break;
3634     case DATA_PREFIX_OPCODE:
3635       implied_prefix = 1;
3636       break;
3637     case REPE_PREFIX_OPCODE:
3638       implied_prefix = 2;
3639       break;
3640     case REPNE_PREFIX_OPCODE:
3641       implied_prefix = 3;
3642       break;
3643     default:
3644       abort ();
3645     }
3646
3647   /* Check the REX.W bit and VEXW.  */
3648   if (i.tm.opcode_modifier.vexw == VEXWIG)
3649     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3650   else if (i.tm.opcode_modifier.vexw)
3651     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3652   else
3653     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3654
3655   /* Use 2-byte VEX prefix if possible.  */
3656   if (w == 0
3657       && i.vec_encoding != vex_encoding_vex3
3658       && i.tm.opcode_modifier.vexopcode == VEX0F
3659       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3660     {
3661       /* 2-byte VEX prefix.  */
3662       unsigned int r;
3663
3664       i.vex.length = 2;
3665       i.vex.bytes[0] = 0xc5;
3666
3667       /* Check the REX.R bit.  */
3668       r = (i.rex & REX_R) ? 0 : 1;
3669       i.vex.bytes[1] = (r << 7
3670                         | register_specifier << 3
3671                         | vector_length << 2
3672                         | implied_prefix);
3673     }
3674   else
3675     {
3676       /* 3-byte VEX prefix.  */
3677       unsigned int m;
3678
3679       i.vex.length = 3;
3680
3681       switch (i.tm.opcode_modifier.vexopcode)
3682         {
3683         case VEX0F:
3684           m = 0x1;
3685           i.vex.bytes[0] = 0xc4;
3686           break;
3687         case VEX0F38:
3688           m = 0x2;
3689           i.vex.bytes[0] = 0xc4;
3690           break;
3691         case VEX0F3A:
3692           m = 0x3;
3693           i.vex.bytes[0] = 0xc4;
3694           break;
3695         case XOP08:
3696           m = 0x8;
3697           i.vex.bytes[0] = 0x8f;
3698           break;
3699         case XOP09:
3700           m = 0x9;
3701           i.vex.bytes[0] = 0x8f;
3702           break;
3703         case XOP0A:
3704           m = 0xa;
3705           i.vex.bytes[0] = 0x8f;
3706           break;
3707         default:
3708           abort ();
3709         }
3710
3711       /* The high 3 bits of the second VEX byte are 1's compliment
3712          of RXB bits from REX.  */
3713       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3714
3715       i.vex.bytes[2] = (w << 7
3716                         | register_specifier << 3
3717                         | vector_length << 2
3718                         | implied_prefix);
3719     }
3720 }
3721
3722 static INLINE bfd_boolean
3723 is_evex_encoding (const insn_template *t)
3724 {
3725   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3726          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3727          || t->opcode_modifier.sae;
3728 }
3729
3730 static INLINE bfd_boolean
3731 is_any_vex_encoding (const insn_template *t)
3732 {
3733   return t->opcode_modifier.vex || t->opcode_modifier.vexopcode
3734          || is_evex_encoding (t);
3735 }
3736
3737 /* Build the EVEX prefix.  */
3738
3739 static void
3740 build_evex_prefix (void)
3741 {
3742   unsigned int register_specifier;
3743   unsigned int implied_prefix;
3744   unsigned int m, w;
3745   rex_byte vrex_used = 0;
3746
3747   /* Check register specifier.  */
3748   if (i.vex.register_specifier)
3749     {
3750       gas_assert ((i.vrex & REX_X) == 0);
3751
3752       register_specifier = i.vex.register_specifier->reg_num;
3753       if ((i.vex.register_specifier->reg_flags & RegRex))
3754         register_specifier += 8;
3755       /* The upper 16 registers are encoded in the fourth byte of the
3756          EVEX prefix.  */
3757       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3758         i.vex.bytes[3] = 0x8;
3759       register_specifier = ~register_specifier & 0xf;
3760     }
3761   else
3762     {
3763       register_specifier = 0xf;
3764
3765       /* Encode upper 16 vector index register in the fourth byte of
3766          the EVEX prefix.  */
3767       if (!(i.vrex & REX_X))
3768         i.vex.bytes[3] = 0x8;
3769       else
3770         vrex_used |= REX_X;
3771     }
3772
3773   switch ((i.tm.base_opcode >> 8) & 0xff)
3774     {
3775     case 0:
3776       implied_prefix = 0;
3777       break;
3778     case DATA_PREFIX_OPCODE:
3779       implied_prefix = 1;
3780       break;
3781     case REPE_PREFIX_OPCODE:
3782       implied_prefix = 2;
3783       break;
3784     case REPNE_PREFIX_OPCODE:
3785       implied_prefix = 3;
3786       break;
3787     default:
3788       abort ();
3789     }
3790
3791   /* 4 byte EVEX prefix.  */
3792   i.vex.length = 4;
3793   i.vex.bytes[0] = 0x62;
3794
3795   /* mmmm bits.  */
3796   switch (i.tm.opcode_modifier.vexopcode)
3797     {
3798     case VEX0F:
3799       m = 1;
3800       break;
3801     case VEX0F38:
3802       m = 2;
3803       break;
3804     case VEX0F3A:
3805       m = 3;
3806       break;
3807     default:
3808       abort ();
3809       break;
3810     }
3811
3812   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3813      bits from REX.  */
3814   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3815
3816   /* The fifth bit of the second EVEX byte is 1's compliment of the
3817      REX_R bit in VREX.  */
3818   if (!(i.vrex & REX_R))
3819     i.vex.bytes[1] |= 0x10;
3820   else
3821     vrex_used |= REX_R;
3822
3823   if ((i.reg_operands + i.imm_operands) == i.operands)
3824     {
3825       /* When all operands are registers, the REX_X bit in REX is not
3826          used.  We reuse it to encode the upper 16 registers, which is
3827          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3828          as 1's compliment.  */
3829       if ((i.vrex & REX_B))
3830         {
3831           vrex_used |= REX_B;
3832           i.vex.bytes[1] &= ~0x40;
3833         }
3834     }
3835
3836   /* EVEX instructions shouldn't need the REX prefix.  */
3837   i.vrex &= ~vrex_used;
3838   gas_assert (i.vrex == 0);
3839
3840   /* Check the REX.W bit and VEXW.  */
3841   if (i.tm.opcode_modifier.vexw == VEXWIG)
3842     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3843   else if (i.tm.opcode_modifier.vexw)
3844     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3845   else
3846     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3847
3848   /* Encode the U bit.  */
3849   implied_prefix |= 0x4;
3850
3851   /* The third byte of the EVEX prefix.  */
3852   i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
3853
3854   /* The fourth byte of the EVEX prefix.  */
3855   /* The zeroing-masking bit.  */
3856   if (i.mask && i.mask->zeroing)
3857     i.vex.bytes[3] |= 0x80;
3858
3859   /* Don't always set the broadcast bit if there is no RC.  */
3860   if (!i.rounding)
3861     {
3862       /* Encode the vector length.  */
3863       unsigned int vec_length;
3864
3865       if (!i.tm.opcode_modifier.evex
3866           || i.tm.opcode_modifier.evex == EVEXDYN)
3867         {
3868           unsigned int op;
3869
3870           /* Determine vector length from the last multi-length vector
3871              operand.  */
3872           vec_length = 0;
3873           for (op = i.operands; op--;)
3874             if (i.tm.operand_types[op].bitfield.xmmword
3875                 + i.tm.operand_types[op].bitfield.ymmword
3876                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3877               {
3878                 if (i.types[op].bitfield.zmmword)
3879                   {
3880                     i.tm.opcode_modifier.evex = EVEX512;
3881                     break;
3882                   }
3883                 else if (i.types[op].bitfield.ymmword)
3884                   {
3885                     i.tm.opcode_modifier.evex = EVEX256;
3886                     break;
3887                   }
3888                 else if (i.types[op].bitfield.xmmword)
3889                   {
3890                     i.tm.opcode_modifier.evex = EVEX128;
3891                     break;
3892                   }
3893                 else if (i.broadcast && (int) op == i.broadcast->operand)
3894                   {
3895                     switch (i.broadcast->bytes)
3896                       {
3897                         case 64:
3898                           i.tm.opcode_modifier.evex = EVEX512;
3899                           break;
3900                         case 32:
3901                           i.tm.opcode_modifier.evex = EVEX256;
3902                           break;
3903                         case 16:
3904                           i.tm.opcode_modifier.evex = EVEX128;
3905                           break;
3906                         default:
3907                           abort ();
3908                       }
3909                     break;
3910                   }
3911               }
3912
3913           if (op >= MAX_OPERANDS)
3914             abort ();
3915         }
3916
3917       switch (i.tm.opcode_modifier.evex)
3918         {
3919         case EVEXLIG: /* LL' is ignored */
3920           vec_length = evexlig << 5;
3921           break;
3922         case EVEX128:
3923           vec_length = 0 << 5;
3924           break;
3925         case EVEX256:
3926           vec_length = 1 << 5;
3927           break;
3928         case EVEX512:
3929           vec_length = 2 << 5;
3930           break;
3931         default:
3932           abort ();
3933           break;
3934         }
3935       i.vex.bytes[3] |= vec_length;
3936       /* Encode the broadcast bit.  */
3937       if (i.broadcast)
3938         i.vex.bytes[3] |= 0x10;
3939     }
3940   else
3941     {
3942       if (i.rounding->type != saeonly)
3943         i.vex.bytes[3] |= 0x10 | (i.rounding->type << 5);
3944       else
3945         i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3946     }
3947
3948   if (i.mask && i.mask->mask)
3949     i.vex.bytes[3] |= i.mask->mask->reg_num;
3950 }
3951
3952 static void
3953 process_immext (void)
3954 {
3955   expressionS *exp;
3956
3957   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3958      which is coded in the same place as an 8-bit immediate field
3959      would be.  Here we fake an 8-bit immediate operand from the
3960      opcode suffix stored in tm.extension_opcode.
3961
3962      AVX instructions also use this encoding, for some of
3963      3 argument instructions.  */
3964
3965   gas_assert (i.imm_operands <= 1
3966               && (i.operands <= 2
3967                   || (is_any_vex_encoding (&i.tm)
3968                       && i.operands <= 4)));
3969
3970   exp = &im_expressions[i.imm_operands++];
3971   i.op[i.operands].imms = exp;
3972   i.types[i.operands] = imm8;
3973   i.operands++;
3974   exp->X_op = O_constant;
3975   exp->X_add_number = i.tm.extension_opcode;
3976   i.tm.extension_opcode = None;
3977 }
3978
3979
3980 static int
3981 check_hle (void)
3982 {
3983   switch (i.tm.opcode_modifier.hleprefixok)
3984     {
3985     default:
3986       abort ();
3987     case HLEPrefixNone:
3988       as_bad (_("invalid instruction `%s' after `%s'"),
3989               i.tm.name, i.hle_prefix);
3990       return 0;
3991     case HLEPrefixLock:
3992       if (i.prefix[LOCK_PREFIX])
3993         return 1;
3994       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
3995       return 0;
3996     case HLEPrefixAny:
3997       return 1;
3998     case HLEPrefixRelease:
3999       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4000         {
4001           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4002                   i.tm.name);
4003           return 0;
4004         }
4005       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4006         {
4007           as_bad (_("memory destination needed for instruction `%s'"
4008                     " after `xrelease'"), i.tm.name);
4009           return 0;
4010         }
4011       return 1;
4012     }
4013 }
4014
4015 /* Try the shortest encoding by shortening operand size.  */
4016
4017 static void
4018 optimize_encoding (void)
4019 {
4020   unsigned int j;
4021
4022   if (optimize_for_space
4023       && !is_any_vex_encoding (&i.tm)
4024       && i.reg_operands == 1
4025       && i.imm_operands == 1
4026       && !i.types[1].bitfield.byte
4027       && i.op[0].imms->X_op == O_constant
4028       && fits_in_imm7 (i.op[0].imms->X_add_number)
4029       && (i.tm.base_opcode == 0xa8
4030           || (i.tm.base_opcode == 0xf6
4031               && i.tm.extension_opcode == 0x0)))
4032     {
4033       /* Optimize: -Os:
4034            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4035        */
4036       unsigned int base_regnum = i.op[1].regs->reg_num;
4037       if (flag_code == CODE_64BIT || base_regnum < 4)
4038         {
4039           i.types[1].bitfield.byte = 1;
4040           /* Ignore the suffix.  */
4041           i.suffix = 0;
4042           /* Convert to byte registers.  */
4043           if (i.types[1].bitfield.word)
4044             j = 16;
4045           else if (i.types[1].bitfield.dword)
4046             j = 32;
4047           else
4048             j = 48;
4049           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4050             j += 8;
4051           i.op[1].regs -= j;
4052         }
4053     }
4054   else if (flag_code == CODE_64BIT
4055            && !is_any_vex_encoding (&i.tm)
4056            && ((i.types[1].bitfield.qword
4057                 && i.reg_operands == 1
4058                 && i.imm_operands == 1
4059                 && i.op[0].imms->X_op == O_constant
4060                 && ((i.tm.base_opcode == 0xb8
4061                      && i.tm.extension_opcode == None
4062                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4063                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4064                         && ((i.tm.base_opcode == 0x24
4065                              || i.tm.base_opcode == 0xa8)
4066                             || (i.tm.base_opcode == 0x80
4067                                 && i.tm.extension_opcode == 0x4)
4068                             || ((i.tm.base_opcode == 0xf6
4069                                  || (i.tm.base_opcode | 1) == 0xc7)
4070                                 && i.tm.extension_opcode == 0x0)))
4071                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4072                         && i.tm.base_opcode == 0x83
4073                         && i.tm.extension_opcode == 0x4)))
4074                || (i.types[0].bitfield.qword
4075                    && ((i.reg_operands == 2
4076                         && i.op[0].regs == i.op[1].regs
4077                         && (i.tm.base_opcode == 0x30
4078                             || i.tm.base_opcode == 0x28))
4079                        || (i.reg_operands == 1
4080                            && i.operands == 1
4081                            && i.tm.base_opcode == 0x30)))))
4082     {
4083       /* Optimize: -O:
4084            andq $imm31, %r64   -> andl $imm31, %r32
4085            andq $imm7, %r64    -> andl $imm7, %r32
4086            testq $imm31, %r64  -> testl $imm31, %r32
4087            xorq %r64, %r64     -> xorl %r32, %r32
4088            subq %r64, %r64     -> subl %r32, %r32
4089            movq $imm31, %r64   -> movl $imm31, %r32
4090            movq $imm32, %r64   -> movl $imm32, %r32
4091         */
4092       i.tm.opcode_modifier.norex64 = 1;
4093       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4094         {
4095           /* Handle
4096                movq $imm31, %r64   -> movl $imm31, %r32
4097                movq $imm32, %r64   -> movl $imm32, %r32
4098            */
4099           i.tm.operand_types[0].bitfield.imm32 = 1;
4100           i.tm.operand_types[0].bitfield.imm32s = 0;
4101           i.tm.operand_types[0].bitfield.imm64 = 0;
4102           i.types[0].bitfield.imm32 = 1;
4103           i.types[0].bitfield.imm32s = 0;
4104           i.types[0].bitfield.imm64 = 0;
4105           i.types[1].bitfield.dword = 1;
4106           i.types[1].bitfield.qword = 0;
4107           if ((i.tm.base_opcode | 1) == 0xc7)
4108             {
4109               /* Handle
4110                    movq $imm31, %r64   -> movl $imm31, %r32
4111                */
4112               i.tm.base_opcode = 0xb8;
4113               i.tm.extension_opcode = None;
4114               i.tm.opcode_modifier.w = 0;
4115               i.tm.opcode_modifier.modrm = 0;
4116             }
4117         }
4118     }
4119   else if (optimize > 1
4120            && !optimize_for_space
4121            && !is_any_vex_encoding (&i.tm)
4122            && i.reg_operands == 2
4123            && i.op[0].regs == i.op[1].regs
4124            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4125                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4126            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4127     {
4128       /* Optimize: -O2:
4129            andb %rN, %rN  -> testb %rN, %rN
4130            andw %rN, %rN  -> testw %rN, %rN
4131            andq %rN, %rN  -> testq %rN, %rN
4132            orb %rN, %rN   -> testb %rN, %rN
4133            orw %rN, %rN   -> testw %rN, %rN
4134            orq %rN, %rN   -> testq %rN, %rN
4135
4136            and outside of 64-bit mode
4137
4138            andl %rN, %rN  -> testl %rN, %rN
4139            orl %rN, %rN   -> testl %rN, %rN
4140        */
4141       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4142     }
4143   else if (i.reg_operands == 3
4144            && i.op[0].regs == i.op[1].regs
4145            && !i.types[2].bitfield.xmmword
4146            && (i.tm.opcode_modifier.vex
4147                || ((!i.mask || i.mask->zeroing)
4148                    && !i.rounding
4149                    && is_evex_encoding (&i.tm)
4150                    && (i.vec_encoding != vex_encoding_evex
4151                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4152                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4153                        || (i.tm.operand_types[2].bitfield.zmmword
4154                            && i.types[2].bitfield.ymmword))))
4155            && ((i.tm.base_opcode == 0x55
4156                 || i.tm.base_opcode == 0x6655
4157                 || i.tm.base_opcode == 0x66df
4158                 || i.tm.base_opcode == 0x57
4159                 || i.tm.base_opcode == 0x6657
4160                 || i.tm.base_opcode == 0x66ef
4161                 || i.tm.base_opcode == 0x66f8
4162                 || i.tm.base_opcode == 0x66f9
4163                 || i.tm.base_opcode == 0x66fa
4164                 || i.tm.base_opcode == 0x66fb
4165                 || i.tm.base_opcode == 0x42
4166                 || i.tm.base_opcode == 0x6642
4167                 || i.tm.base_opcode == 0x47
4168                 || i.tm.base_opcode == 0x6647)
4169                && i.tm.extension_opcode == None))
4170     {
4171       /* Optimize: -O1:
4172            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4173            vpsubq and vpsubw:
4174              EVEX VOP %zmmM, %zmmM, %zmmN
4175                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4176                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4177              EVEX VOP %ymmM, %ymmM, %ymmN
4178                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4179                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4180              VEX VOP %ymmM, %ymmM, %ymmN
4181                -> VEX VOP %xmmM, %xmmM, %xmmN
4182            VOP, one of vpandn and vpxor:
4183              VEX VOP %ymmM, %ymmM, %ymmN
4184                -> VEX VOP %xmmM, %xmmM, %xmmN
4185            VOP, one of vpandnd and vpandnq:
4186              EVEX VOP %zmmM, %zmmM, %zmmN
4187                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4188                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4189              EVEX VOP %ymmM, %ymmM, %ymmN
4190                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4191                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4192            VOP, one of vpxord and vpxorq:
4193              EVEX VOP %zmmM, %zmmM, %zmmN
4194                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4195                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4196              EVEX VOP %ymmM, %ymmM, %ymmN
4197                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4198                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4199            VOP, one of kxord and kxorq:
4200              VEX VOP %kM, %kM, %kN
4201                -> VEX kxorw %kM, %kM, %kN
4202            VOP, one of kandnd and kandnq:
4203              VEX VOP %kM, %kM, %kN
4204                -> VEX kandnw %kM, %kM, %kN
4205        */
4206       if (is_evex_encoding (&i.tm))
4207         {
4208           if (i.vec_encoding != vex_encoding_evex)
4209             {
4210               i.tm.opcode_modifier.vex = VEX128;
4211               i.tm.opcode_modifier.vexw = VEXW0;
4212               i.tm.opcode_modifier.evex = 0;
4213             }
4214           else if (optimize > 1)
4215             i.tm.opcode_modifier.evex = EVEX128;
4216           else
4217             return;
4218         }
4219       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4220         {
4221           i.tm.base_opcode &= 0xff;
4222           i.tm.opcode_modifier.vexw = VEXW0;
4223         }
4224       else
4225         i.tm.opcode_modifier.vex = VEX128;
4226
4227       if (i.tm.opcode_modifier.vex)
4228         for (j = 0; j < 3; j++)
4229           {
4230             i.types[j].bitfield.xmmword = 1;
4231             i.types[j].bitfield.ymmword = 0;
4232           }
4233     }
4234   else if (i.vec_encoding != vex_encoding_evex
4235            && !i.types[0].bitfield.zmmword
4236            && !i.types[1].bitfield.zmmword
4237            && !i.mask
4238            && !i.broadcast
4239            && is_evex_encoding (&i.tm)
4240            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
4241                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
4242                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
4243                || (i.tm.base_opcode & ~4) == 0x66db
4244                || (i.tm.base_opcode & ~4) == 0x66eb)
4245            && i.tm.extension_opcode == None)
4246     {
4247       /* Optimize: -O1:
4248            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4249            vmovdqu32 and vmovdqu64:
4250              EVEX VOP %xmmM, %xmmN
4251                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4252              EVEX VOP %ymmM, %ymmN
4253                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4254              EVEX VOP %xmmM, mem
4255                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4256              EVEX VOP %ymmM, mem
4257                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4258              EVEX VOP mem, %xmmN
4259                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4260              EVEX VOP mem, %ymmN
4261                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4262            VOP, one of vpand, vpandn, vpor, vpxor:
4263              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4264                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4265              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4266                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4267              EVEX VOP{d,q} mem, %xmmM, %xmmN
4268                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4269              EVEX VOP{d,q} mem, %ymmM, %ymmN
4270                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4271        */
4272       for (j = 0; j < i.operands; j++)
4273         if (operand_type_check (i.types[j], disp)
4274             && i.op[j].disps->X_op == O_constant)
4275           {
4276             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4277                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4278                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4279             int evex_disp8, vex_disp8;
4280             unsigned int memshift = i.memshift;
4281             offsetT n = i.op[j].disps->X_add_number;
4282
4283             evex_disp8 = fits_in_disp8 (n);
4284             i.memshift = 0;
4285             vex_disp8 = fits_in_disp8 (n);
4286             if (evex_disp8 != vex_disp8)
4287               {
4288                 i.memshift = memshift;
4289                 return;
4290               }
4291
4292             i.types[j].bitfield.disp8 = vex_disp8;
4293             break;
4294           }
4295       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
4296         i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
4297       i.tm.opcode_modifier.vex
4298         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4299       i.tm.opcode_modifier.vexw = VEXW0;
4300       /* VPAND, VPOR, and VPXOR are commutative.  */
4301       if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
4302         i.tm.opcode_modifier.commutative = 1;
4303       i.tm.opcode_modifier.evex = 0;
4304       i.tm.opcode_modifier.masking = 0;
4305       i.tm.opcode_modifier.broadcast = 0;
4306       i.tm.opcode_modifier.disp8memshift = 0;
4307       i.memshift = 0;
4308       if (j < i.operands)
4309         i.types[j].bitfield.disp8
4310           = fits_in_disp8 (i.op[j].disps->X_add_number);
4311     }
4312 }
4313
4314 /* This is the guts of the machine-dependent assembler.  LINE points to a
4315    machine dependent instruction.  This function is supposed to emit
4316    the frags/bytes it assembles to.  */
4317
4318 void
4319 md_assemble (char *line)
4320 {
4321   unsigned int j;
4322   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4323   const insn_template *t;
4324
4325   /* Initialize globals.  */
4326   memset (&i, '\0', sizeof (i));
4327   for (j = 0; j < MAX_OPERANDS; j++)
4328     i.reloc[j] = NO_RELOC;
4329   memset (disp_expressions, '\0', sizeof (disp_expressions));
4330   memset (im_expressions, '\0', sizeof (im_expressions));
4331   save_stack_p = save_stack;
4332
4333   /* First parse an instruction mnemonic & call i386_operand for the operands.
4334      We assume that the scrubber has arranged it so that line[0] is the valid
4335      start of a (possibly prefixed) mnemonic.  */
4336
4337   line = parse_insn (line, mnemonic);
4338   if (line == NULL)
4339     return;
4340   mnem_suffix = i.suffix;
4341
4342   line = parse_operands (line, mnemonic);
4343   this_operand = -1;
4344   xfree (i.memop1_string);
4345   i.memop1_string = NULL;
4346   if (line == NULL)
4347     return;
4348
4349   /* Now we've parsed the mnemonic into a set of templates, and have the
4350      operands at hand.  */
4351
4352   /* All Intel opcodes have reversed operands except for "bound", "enter",
4353      "monitor*", "mwait*", "tpause", and "umwait".  We also don't reverse
4354      intersegment "jmp" and "call" instructions with 2 immediate operands so
4355      that the immediate segment precedes the offset, as it does when in AT&T
4356      mode.  */
4357   if (intel_syntax
4358       && i.operands > 1
4359       && (strcmp (mnemonic, "bound") != 0)
4360       && (strcmp (mnemonic, "invlpga") != 0)
4361       && (strncmp (mnemonic, "monitor", 7) != 0)
4362       && (strncmp (mnemonic, "mwait", 5) != 0)
4363       && (strcmp (mnemonic, "tpause") != 0)
4364       && (strcmp (mnemonic, "umwait") != 0)
4365       && !(operand_type_check (i.types[0], imm)
4366            && operand_type_check (i.types[1], imm)))
4367     swap_operands ();
4368
4369   /* The order of the immediates should be reversed
4370      for 2 immediates extrq and insertq instructions */
4371   if (i.imm_operands == 2
4372       && (strcmp (mnemonic, "extrq") == 0
4373           || strcmp (mnemonic, "insertq") == 0))
4374       swap_2_operands (0, 1);
4375
4376   if (i.imm_operands)
4377     optimize_imm ();
4378
4379   /* Don't optimize displacement for movabs since it only takes 64bit
4380      displacement.  */
4381   if (i.disp_operands
4382       && i.disp_encoding != disp_encoding_32bit
4383       && (flag_code != CODE_64BIT
4384           || strcmp (mnemonic, "movabs") != 0))
4385     optimize_disp ();
4386
4387   /* Next, we find a template that matches the given insn,
4388      making sure the overlap of the given operands types is consistent
4389      with the template operand types.  */
4390
4391   if (!(t = match_template (mnem_suffix)))
4392     return;
4393
4394   if (sse_check != check_none
4395       && !i.tm.opcode_modifier.noavx
4396       && !i.tm.cpu_flags.bitfield.cpuavx
4397       && !i.tm.cpu_flags.bitfield.cpuavx512f
4398       && (i.tm.cpu_flags.bitfield.cpusse
4399           || i.tm.cpu_flags.bitfield.cpusse2
4400           || i.tm.cpu_flags.bitfield.cpusse3
4401           || i.tm.cpu_flags.bitfield.cpussse3
4402           || i.tm.cpu_flags.bitfield.cpusse4_1
4403           || i.tm.cpu_flags.bitfield.cpusse4_2
4404           || i.tm.cpu_flags.bitfield.cpusse4a
4405           || i.tm.cpu_flags.bitfield.cpupclmul
4406           || i.tm.cpu_flags.bitfield.cpuaes
4407           || i.tm.cpu_flags.bitfield.cpusha
4408           || i.tm.cpu_flags.bitfield.cpugfni))
4409     {
4410       (sse_check == check_warning
4411        ? as_warn
4412        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4413     }
4414
4415   if (i.tm.opcode_modifier.fwait)
4416     if (!add_prefix (FWAIT_OPCODE))
4417       return;
4418
4419   /* Check if REP prefix is OK.  */
4420   if (i.rep_prefix && !i.tm.opcode_modifier.repprefixok)
4421     {
4422       as_bad (_("invalid instruction `%s' after `%s'"),
4423                 i.tm.name, i.rep_prefix);
4424       return;
4425     }
4426
4427   /* Check for lock without a lockable instruction.  Destination operand
4428      must be memory unless it is xchg (0x86).  */
4429   if (i.prefix[LOCK_PREFIX]
4430       && (!i.tm.opcode_modifier.islockable
4431           || i.mem_operands == 0
4432           || (i.tm.base_opcode != 0x86
4433               && !(i.flags[i.operands - 1] & Operand_Mem))))
4434     {
4435       as_bad (_("expecting lockable instruction after `lock'"));
4436       return;
4437     }
4438
4439   /* Check for data size prefix on VEX/XOP/EVEX encoded insns.  */
4440   if (i.prefix[DATA_PREFIX] && is_any_vex_encoding (&i.tm))
4441     {
4442       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4443       return;
4444     }
4445
4446   /* Check if HLE prefix is OK.  */
4447   if (i.hle_prefix && !check_hle ())
4448     return;
4449
4450   /* Check BND prefix.  */
4451   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
4452     as_bad (_("expecting valid branch instruction after `bnd'"));
4453
4454   /* Check NOTRACK prefix.  */
4455   if (i.notrack_prefix && !i.tm.opcode_modifier.notrackprefixok)
4456     as_bad (_("expecting indirect branch instruction after `notrack'"));
4457
4458   if (i.tm.cpu_flags.bitfield.cpumpx)
4459     {
4460       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4461         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
4462       else if (flag_code != CODE_16BIT
4463                ? i.prefix[ADDR_PREFIX]
4464                : i.mem_operands && !i.prefix[ADDR_PREFIX])
4465         as_bad (_("16-bit address isn't allowed in MPX instructions"));
4466     }
4467
4468   /* Insert BND prefix.  */
4469   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
4470     {
4471       if (!i.prefix[BND_PREFIX])
4472         add_prefix (BND_PREFIX_OPCODE);
4473       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
4474         {
4475           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
4476           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
4477         }
4478     }
4479
4480   /* Check string instruction segment overrides.  */
4481   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
4482     {
4483       gas_assert (i.mem_operands);
4484       if (!check_string ())
4485         return;
4486       i.disp_operands = 0;
4487     }
4488
4489   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
4490     optimize_encoding ();
4491
4492   if (!process_suffix ())
4493     return;
4494
4495   /* Update operand types.  */
4496   for (j = 0; j < i.operands; j++)
4497     i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
4498
4499   /* Make still unresolved immediate matches conform to size of immediate
4500      given in i.suffix.  */
4501   if (!finalize_imm ())
4502     return;
4503
4504   if (i.types[0].bitfield.imm1)
4505     i.imm_operands = 0; /* kludge for shift insns.  */
4506
4507   /* We only need to check those implicit registers for instructions
4508      with 3 operands or less.  */
4509   if (i.operands <= 3)
4510     for (j = 0; j < i.operands; j++)
4511       if (i.types[j].bitfield.instance != InstanceNone
4512           && !i.types[j].bitfield.xmmword)
4513         i.reg_operands--;
4514
4515   /* ImmExt should be processed after SSE2AVX.  */
4516   if (!i.tm.opcode_modifier.sse2avx
4517       && i.tm.opcode_modifier.immext)
4518     process_immext ();
4519
4520   /* For insns with operands there are more diddles to do to the opcode.  */
4521   if (i.operands)
4522     {
4523       if (!process_operands ())
4524         return;
4525     }
4526   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
4527     {
4528       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
4529       as_warn (_("translating to `%sp'"), i.tm.name);
4530     }
4531
4532   if (is_any_vex_encoding (&i.tm))
4533     {
4534       if (!cpu_arch_flags.bitfield.cpui286)
4535         {
4536           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
4537                   i.tm.name);
4538           return;
4539         }
4540
4541       if (i.tm.opcode_modifier.vex)
4542         build_vex_prefix (t);
4543       else
4544         build_evex_prefix ();
4545     }
4546
4547   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
4548      instructions may define INT_OPCODE as well, so avoid this corner
4549      case for those instructions that use MODRM.  */
4550   if (i.tm.base_opcode == INT_OPCODE
4551       && !i.tm.opcode_modifier.modrm
4552       && i.op[0].imms->X_add_number == 3)
4553     {
4554       i.tm.base_opcode = INT3_OPCODE;
4555       i.imm_operands = 0;
4556     }
4557
4558   if ((i.tm.opcode_modifier.jump == JUMP
4559        || i.tm.opcode_modifier.jump == JUMP_BYTE
4560        || i.tm.opcode_modifier.jump == JUMP_DWORD)
4561       && i.op[0].disps->X_op == O_constant)
4562     {
4563       /* Convert "jmp constant" (and "call constant") to a jump (call) to
4564          the absolute address given by the constant.  Since ix86 jumps and
4565          calls are pc relative, we need to generate a reloc.  */
4566       i.op[0].disps->X_add_symbol = &abs_symbol;
4567       i.op[0].disps->X_op = O_symbol;
4568     }
4569
4570   /* For 8 bit registers we need an empty rex prefix.  Also if the
4571      instruction already has a prefix, we need to convert old
4572      registers to new ones.  */
4573
4574   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
4575        && (i.op[0].regs->reg_flags & RegRex64) != 0)
4576       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
4577           && (i.op[1].regs->reg_flags & RegRex64) != 0)
4578       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
4579            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
4580           && i.rex != 0))
4581     {
4582       int x;
4583
4584       i.rex |= REX_OPCODE;
4585       for (x = 0; x < 2; x++)
4586         {
4587           /* Look for 8 bit operand that uses old registers.  */
4588           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4589               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4590             {
4591               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4592               /* In case it is "hi" register, give up.  */
4593               if (i.op[x].regs->reg_num > 3)
4594                 as_bad (_("can't encode register '%s%s' in an "
4595                           "instruction requiring REX prefix."),
4596                         register_prefix, i.op[x].regs->reg_name);
4597
4598               /* Otherwise it is equivalent to the extended register.
4599                  Since the encoding doesn't change this is merely
4600                  cosmetic cleanup for debug output.  */
4601
4602               i.op[x].regs = i.op[x].regs + 8;
4603             }
4604         }
4605     }
4606
4607   if (i.rex == 0 && i.rex_encoding)
4608     {
4609       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
4610          that uses legacy register.  If it is "hi" register, don't add
4611          the REX_OPCODE byte.  */
4612       int x;
4613       for (x = 0; x < 2; x++)
4614         if (i.types[x].bitfield.class == Reg
4615             && i.types[x].bitfield.byte
4616             && (i.op[x].regs->reg_flags & RegRex64) == 0
4617             && i.op[x].regs->reg_num > 3)
4618           {
4619             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4620             i.rex_encoding = FALSE;
4621             break;
4622           }
4623
4624       if (i.rex_encoding)
4625         i.rex = REX_OPCODE;
4626     }
4627
4628   if (i.rex != 0)
4629     add_prefix (REX_OPCODE | i.rex);
4630
4631   /* We are ready to output the insn.  */
4632   output_insn ();
4633
4634   last_insn.seg = now_seg;
4635
4636   if (i.tm.opcode_modifier.isprefix)
4637     {
4638       last_insn.kind = last_insn_prefix;
4639       last_insn.name = i.tm.name;
4640       last_insn.file = as_where (&last_insn.line);
4641     }
4642   else
4643     last_insn.kind = last_insn_other;
4644 }
4645
4646 static char *
4647 parse_insn (char *line, char *mnemonic)
4648 {
4649   char *l = line;
4650   char *token_start = l;
4651   char *mnem_p;
4652   int supported;
4653   const insn_template *t;
4654   char *dot_p = NULL;
4655
4656   while (1)
4657     {
4658       mnem_p = mnemonic;
4659       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
4660         {
4661           if (*mnem_p == '.')
4662             dot_p = mnem_p;
4663           mnem_p++;
4664           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
4665             {
4666               as_bad (_("no such instruction: `%s'"), token_start);
4667               return NULL;
4668             }
4669           l++;
4670         }
4671       if (!is_space_char (*l)
4672           && *l != END_OF_INSN
4673           && (intel_syntax
4674               || (*l != PREFIX_SEPARATOR
4675                   && *l != ',')))
4676         {
4677           as_bad (_("invalid character %s in mnemonic"),
4678                   output_invalid (*l));
4679           return NULL;
4680         }
4681       if (token_start == l)
4682         {
4683           if (!intel_syntax && *l == PREFIX_SEPARATOR)
4684             as_bad (_("expecting prefix; got nothing"));
4685           else
4686             as_bad (_("expecting mnemonic; got nothing"));
4687           return NULL;
4688         }
4689
4690       /* Look up instruction (or prefix) via hash table.  */
4691       current_templates = (const templates *) hash_find (op_hash, mnemonic);
4692
4693       if (*l != END_OF_INSN
4694           && (!is_space_char (*l) || l[1] != END_OF_INSN)
4695           && current_templates
4696           && current_templates->start->opcode_modifier.isprefix)
4697         {
4698           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
4699             {
4700               as_bad ((flag_code != CODE_64BIT
4701                        ? _("`%s' is only supported in 64-bit mode")
4702                        : _("`%s' is not supported in 64-bit mode")),
4703                       current_templates->start->name);
4704               return NULL;
4705             }
4706           /* If we are in 16-bit mode, do not allow addr16 or data16.
4707              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
4708           if ((current_templates->start->opcode_modifier.size == SIZE16
4709                || current_templates->start->opcode_modifier.size == SIZE32)
4710               && flag_code != CODE_64BIT
4711               && ((current_templates->start->opcode_modifier.size == SIZE32)
4712                   ^ (flag_code == CODE_16BIT)))
4713             {
4714               as_bad (_("redundant %s prefix"),
4715                       current_templates->start->name);
4716               return NULL;
4717             }
4718           if (current_templates->start->opcode_length == 0)
4719             {
4720               /* Handle pseudo prefixes.  */
4721               switch (current_templates->start->base_opcode)
4722                 {
4723                 case 0x0:
4724                   /* {disp8} */
4725                   i.disp_encoding = disp_encoding_8bit;
4726                   break;
4727                 case 0x1:
4728                   /* {disp32} */
4729                   i.disp_encoding = disp_encoding_32bit;
4730                   break;
4731                 case 0x2:
4732                   /* {load} */
4733                   i.dir_encoding = dir_encoding_load;
4734                   break;
4735                 case 0x3:
4736                   /* {store} */
4737                   i.dir_encoding = dir_encoding_store;
4738                   break;
4739                 case 0x4:
4740                   /* {vex} */
4741                   i.vec_encoding = vex_encoding_vex;
4742                   break;
4743                 case 0x5:
4744                   /* {vex3} */
4745                   i.vec_encoding = vex_encoding_vex3;
4746                   break;
4747                 case 0x6:
4748                   /* {evex} */
4749                   i.vec_encoding = vex_encoding_evex;
4750                   break;
4751                 case 0x7:
4752                   /* {rex} */
4753                   i.rex_encoding = TRUE;
4754                   break;
4755                 case 0x8:
4756                   /* {nooptimize} */
4757                   i.no_optimize = TRUE;
4758                   break;
4759                 default:
4760                   abort ();
4761                 }
4762             }
4763           else
4764             {
4765               /* Add prefix, checking for repeated prefixes.  */
4766               switch (add_prefix (current_templates->start->base_opcode))
4767                 {
4768                 case PREFIX_EXIST:
4769                   return NULL;
4770                 case PREFIX_DS:
4771                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
4772                     i.notrack_prefix = current_templates->start->name;
4773                   break;
4774                 case PREFIX_REP:
4775                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
4776                     i.hle_prefix = current_templates->start->name;
4777                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
4778                     i.bnd_prefix = current_templates->start->name;
4779                   else
4780                     i.rep_prefix = current_templates->start->name;
4781                   break;
4782                 default:
4783                   break;
4784                 }
4785             }
4786           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
4787           token_start = ++l;
4788         }
4789       else
4790         break;
4791     }
4792
4793   if (!current_templates)
4794     {
4795       /* Deprecated functionality (new code should use pseudo-prefixes instead):
4796          Check if we should swap operand or force 32bit displacement in
4797          encoding.  */
4798       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
4799         i.dir_encoding = dir_encoding_swap;
4800       else if (mnem_p - 3 == dot_p
4801                && dot_p[1] == 'd'
4802                && dot_p[2] == '8')
4803         i.disp_encoding = disp_encoding_8bit;
4804       else if (mnem_p - 4 == dot_p
4805                && dot_p[1] == 'd'
4806                && dot_p[2] == '3'
4807                && dot_p[3] == '2')
4808         i.disp_encoding = disp_encoding_32bit;
4809       else
4810         goto check_suffix;
4811       mnem_p = dot_p;
4812       *dot_p = '\0';
4813       current_templates = (const templates *) hash_find (op_hash, mnemonic);
4814     }
4815
4816   if (!current_templates)
4817     {
4818     check_suffix:
4819       if (mnem_p > mnemonic)
4820         {
4821           /* See if we can get a match by trimming off a suffix.  */
4822           switch (mnem_p[-1])
4823             {
4824             case WORD_MNEM_SUFFIX:
4825               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
4826                 i.suffix = SHORT_MNEM_SUFFIX;
4827               else
4828                 /* Fall through.  */
4829               case BYTE_MNEM_SUFFIX:
4830               case QWORD_MNEM_SUFFIX:
4831                 i.suffix = mnem_p[-1];
4832               mnem_p[-1] = '\0';
4833               current_templates = (const templates *) hash_find (op_hash,
4834                                                                  mnemonic);
4835               break;
4836             case SHORT_MNEM_SUFFIX:
4837             case LONG_MNEM_SUFFIX:
4838               if (!intel_syntax)
4839                 {
4840                   i.suffix = mnem_p[-1];
4841                   mnem_p[-1] = '\0';
4842                   current_templates = (const templates *) hash_find (op_hash,
4843                                                                      mnemonic);
4844                 }
4845               break;
4846
4847               /* Intel Syntax.  */
4848             case 'd':
4849               if (intel_syntax)
4850                 {
4851                   if (intel_float_operand (mnemonic) == 1)
4852                     i.suffix = SHORT_MNEM_SUFFIX;
4853                   else
4854                     i.suffix = LONG_MNEM_SUFFIX;
4855                   mnem_p[-1] = '\0';
4856                   current_templates = (const templates *) hash_find (op_hash,
4857                                                                      mnemonic);
4858                 }
4859               break;
4860             }
4861         }
4862
4863       if (!current_templates)
4864         {
4865           as_bad (_("no such instruction: `%s'"), token_start);
4866           return NULL;
4867         }
4868     }
4869
4870   if (current_templates->start->opcode_modifier.jump == JUMP
4871       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
4872     {
4873       /* Check for a branch hint.  We allow ",pt" and ",pn" for
4874          predict taken and predict not taken respectively.
4875          I'm not sure that branch hints actually do anything on loop
4876          and jcxz insns (JumpByte) for current Pentium4 chips.  They
4877          may work in the future and it doesn't hurt to accept them
4878          now.  */
4879       if (l[0] == ',' && l[1] == 'p')
4880         {
4881           if (l[2] == 't')
4882             {
4883               if (!add_prefix (DS_PREFIX_OPCODE))
4884                 return NULL;
4885               l += 3;
4886             }
4887           else if (l[2] == 'n')
4888             {
4889               if (!add_prefix (CS_PREFIX_OPCODE))
4890                 return NULL;
4891               l += 3;
4892             }
4893         }
4894     }
4895   /* Any other comma loses.  */
4896   if (*l == ',')
4897     {
4898       as_bad (_("invalid character %s in mnemonic"),
4899               output_invalid (*l));
4900       return NULL;
4901     }
4902
4903   /* Check if instruction is supported on specified architecture.  */
4904   supported = 0;
4905   for (t = current_templates->start; t < current_templates->end; ++t)
4906     {
4907       supported |= cpu_flags_match (t);
4908       if (supported == CPU_FLAGS_PERFECT_MATCH)
4909         {
4910           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
4911             as_warn (_("use .code16 to ensure correct addressing mode"));
4912
4913           return l;
4914         }
4915     }
4916
4917   if (!(supported & CPU_FLAGS_64BIT_MATCH))
4918     as_bad (flag_code == CODE_64BIT
4919             ? _("`%s' is not supported in 64-bit mode")
4920             : _("`%s' is only supported in 64-bit mode"),
4921             current_templates->start->name);
4922   else
4923     as_bad (_("`%s' is not supported on `%s%s'"),
4924             current_templates->start->name,
4925             cpu_arch_name ? cpu_arch_name : default_arch,
4926             cpu_sub_arch_name ? cpu_sub_arch_name : "");
4927
4928   return NULL;
4929 }
4930
4931 static char *
4932 parse_operands (char *l, const char *mnemonic)
4933 {
4934   char *token_start;
4935
4936   /* 1 if operand is pending after ','.  */
4937   unsigned int expecting_operand = 0;
4938
4939   /* Non-zero if operand parens not balanced.  */
4940   unsigned int paren_not_balanced;
4941
4942   while (*l != END_OF_INSN)
4943     {
4944       /* Skip optional white space before operand.  */
4945       if (is_space_char (*l))
4946         ++l;
4947       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
4948         {
4949           as_bad (_("invalid character %s before operand %d"),
4950                   output_invalid (*l),
4951                   i.operands + 1);
4952           return NULL;
4953         }
4954       token_start = l;  /* After white space.  */
4955       paren_not_balanced = 0;
4956       while (paren_not_balanced || *l != ',')
4957         {
4958           if (*l == END_OF_INSN)
4959             {
4960               if (paren_not_balanced)
4961                 {
4962                   if (!intel_syntax)
4963                     as_bad (_("unbalanced parenthesis in operand %d."),
4964                             i.operands + 1);
4965                   else
4966                     as_bad (_("unbalanced brackets in operand %d."),
4967                             i.operands + 1);
4968                   return NULL;
4969                 }
4970               else
4971                 break;  /* we are done */
4972             }
4973           else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"')
4974             {
4975               as_bad (_("invalid character %s in operand %d"),
4976                       output_invalid (*l),
4977                       i.operands + 1);
4978               return NULL;
4979             }
4980           if (!intel_syntax)
4981             {
4982               if (*l == '(')
4983                 ++paren_not_balanced;
4984               if (*l == ')')
4985                 --paren_not_balanced;
4986             }
4987           else
4988             {
4989               if (*l == '[')
4990                 ++paren_not_balanced;
4991               if (*l == ']')
4992                 --paren_not_balanced;
4993             }
4994           l++;
4995         }
4996       if (l != token_start)
4997         {                       /* Yes, we've read in another operand.  */
4998           unsigned int operand_ok;
4999           this_operand = i.operands++;
5000           if (i.operands > MAX_OPERANDS)
5001             {
5002               as_bad (_("spurious operands; (%d operands/instruction max)"),
5003                       MAX_OPERANDS);
5004               return NULL;
5005             }
5006           i.types[this_operand].bitfield.unspecified = 1;
5007           /* Now parse operand adding info to 'i' as we go along.  */
5008           END_STRING_AND_SAVE (l);
5009
5010           if (i.mem_operands > 1)
5011             {
5012               as_bad (_("too many memory references for `%s'"),
5013                       mnemonic);
5014               return 0;
5015             }
5016
5017           if (intel_syntax)
5018             operand_ok =
5019               i386_intel_operand (token_start,
5020                                   intel_float_operand (mnemonic));
5021           else
5022             operand_ok = i386_att_operand (token_start);
5023
5024           RESTORE_END_STRING (l);
5025           if (!operand_ok)
5026             return NULL;
5027         }
5028       else
5029         {
5030           if (expecting_operand)
5031             {
5032             expecting_operand_after_comma:
5033               as_bad (_("expecting operand after ','; got nothing"));
5034               return NULL;
5035             }
5036           if (*l == ',')
5037             {
5038               as_bad (_("expecting operand before ','; got nothing"));
5039               return NULL;
5040             }
5041         }
5042
5043       /* Now *l must be either ',' or END_OF_INSN.  */
5044       if (*l == ',')
5045         {
5046           if (*++l == END_OF_INSN)
5047             {
5048               /* Just skip it, if it's \n complain.  */
5049               goto expecting_operand_after_comma;
5050             }
5051           expecting_operand = 1;
5052         }
5053     }
5054   return l;
5055 }
5056
5057 static void
5058 swap_2_operands (int xchg1, int xchg2)
5059 {
5060   union i386_op temp_op;
5061   i386_operand_type temp_type;
5062   unsigned int temp_flags;
5063   enum bfd_reloc_code_real temp_reloc;
5064
5065   temp_type = i.types[xchg2];
5066   i.types[xchg2] = i.types[xchg1];
5067   i.types[xchg1] = temp_type;
5068
5069   temp_flags = i.flags[xchg2];
5070   i.flags[xchg2] = i.flags[xchg1];
5071   i.flags[xchg1] = temp_flags;
5072
5073   temp_op = i.op[xchg2];
5074   i.op[xchg2] = i.op[xchg1];
5075   i.op[xchg1] = temp_op;
5076
5077   temp_reloc = i.reloc[xchg2];
5078   i.reloc[xchg2] = i.reloc[xchg1];
5079   i.reloc[xchg1] = temp_reloc;
5080
5081   if (i.mask)
5082     {
5083       if (i.mask->operand == xchg1)
5084         i.mask->operand = xchg2;
5085       else if (i.mask->operand == xchg2)
5086         i.mask->operand = xchg1;
5087     }
5088   if (i.broadcast)
5089     {
5090       if (i.broadcast->operand == xchg1)
5091         i.broadcast->operand = xchg2;
5092       else if (i.broadcast->operand == xchg2)
5093         i.broadcast->operand = xchg1;
5094     }
5095   if (i.rounding)
5096     {
5097       if (i.rounding->operand == xchg1)
5098         i.rounding->operand = xchg2;
5099       else if (i.rounding->operand == xchg2)
5100         i.rounding->operand = xchg1;
5101     }
5102 }
5103
5104 static void
5105 swap_operands (void)
5106 {
5107   switch (i.operands)
5108     {
5109     case 5:
5110     case 4:
5111       swap_2_operands (1, i.operands - 2);
5112       /* Fall through.  */
5113     case 3:
5114     case 2:
5115       swap_2_operands (0, i.operands - 1);
5116       break;
5117     default:
5118       abort ();
5119     }
5120
5121   if (i.mem_operands == 2)
5122     {
5123       const seg_entry *temp_seg;
5124       temp_seg = i.seg[0];
5125       i.seg[0] = i.seg[1];
5126       i.seg[1] = temp_seg;
5127     }
5128 }
5129
5130 /* Try to ensure constant immediates are represented in the smallest
5131    opcode possible.  */
5132 static void
5133 optimize_imm (void)
5134 {
5135   char guess_suffix = 0;
5136   int op;
5137
5138   if (i.suffix)
5139     guess_suffix = i.suffix;
5140   else if (i.reg_operands)
5141     {
5142       /* Figure out a suffix from the last register operand specified.
5143          We can't do this properly yet, i.e. excluding special register
5144          instances, but the following works for instructions with
5145          immediates.  In any case, we can't set i.suffix yet.  */
5146       for (op = i.operands; --op >= 0;)
5147         if (i.types[op].bitfield.class != Reg)
5148           continue;
5149         else if (i.types[op].bitfield.byte)
5150           {
5151             guess_suffix = BYTE_MNEM_SUFFIX;
5152             break;
5153           }
5154         else if (i.types[op].bitfield.word)
5155           {
5156             guess_suffix = WORD_MNEM_SUFFIX;
5157             break;
5158           }
5159         else if (i.types[op].bitfield.dword)
5160           {
5161             guess_suffix = LONG_MNEM_SUFFIX;
5162             break;
5163           }
5164         else if (i.types[op].bitfield.qword)
5165           {
5166             guess_suffix = QWORD_MNEM_SUFFIX;
5167             break;
5168           }
5169     }
5170   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5171     guess_suffix = WORD_MNEM_SUFFIX;
5172
5173   for (op = i.operands; --op >= 0;)
5174     if (operand_type_check (i.types[op], imm))
5175       {
5176         switch (i.op[op].imms->X_op)
5177           {
5178           case O_constant:
5179             /* If a suffix is given, this operand may be shortened.  */
5180             switch (guess_suffix)
5181               {
5182               case LONG_MNEM_SUFFIX:
5183                 i.types[op].bitfield.imm32 = 1;
5184                 i.types[op].bitfield.imm64 = 1;
5185                 break;
5186               case WORD_MNEM_SUFFIX:
5187                 i.types[op].bitfield.imm16 = 1;
5188                 i.types[op].bitfield.imm32 = 1;
5189                 i.types[op].bitfield.imm32s = 1;
5190                 i.types[op].bitfield.imm64 = 1;
5191                 break;
5192               case BYTE_MNEM_SUFFIX:
5193                 i.types[op].bitfield.imm8 = 1;
5194                 i.types[op].bitfield.imm8s = 1;
5195                 i.types[op].bitfield.imm16 = 1;
5196                 i.types[op].bitfield.imm32 = 1;
5197                 i.types[op].bitfield.imm32s = 1;
5198                 i.types[op].bitfield.imm64 = 1;
5199                 break;
5200               }
5201
5202             /* If this operand is at most 16 bits, convert it
5203                to a signed 16 bit number before trying to see
5204                whether it will fit in an even smaller size.
5205                This allows a 16-bit operand such as $0xffe0 to
5206                be recognised as within Imm8S range.  */
5207             if ((i.types[op].bitfield.imm16)
5208                 && (i.op[op].imms->X_add_number & ~(offsetT) 0xffff) == 0)
5209               {
5210                 i.op[op].imms->X_add_number =
5211                   (((i.op[op].imms->X_add_number & 0xffff) ^ 0x8000) - 0x8000);
5212               }
5213 #ifdef BFD64
5214             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5215             if ((i.types[op].bitfield.imm32)
5216                 && ((i.op[op].imms->X_add_number & ~(((offsetT) 2 << 31) - 1))
5217                     == 0))
5218               {
5219                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5220                                                 ^ ((offsetT) 1 << 31))
5221                                                - ((offsetT) 1 << 31));
5222               }
5223 #endif
5224             i.types[op]
5225               = operand_type_or (i.types[op],
5226                                  smallest_imm_type (i.op[op].imms->X_add_number));
5227
5228             /* We must avoid matching of Imm32 templates when 64bit
5229                only immediate is available.  */
5230             if (guess_suffix == QWORD_MNEM_SUFFIX)
5231               i.types[op].bitfield.imm32 = 0;
5232             break;
5233
5234           case O_absent:
5235           case O_register:
5236             abort ();
5237
5238             /* Symbols and expressions.  */
5239           default:
5240             /* Convert symbolic operand to proper sizes for matching, but don't
5241                prevent matching a set of insns that only supports sizes other
5242                than those matching the insn suffix.  */
5243             {
5244               i386_operand_type mask, allowed;
5245               const insn_template *t;
5246
5247               operand_type_set (&mask, 0);
5248               operand_type_set (&allowed, 0);
5249
5250               for (t = current_templates->start;
5251                    t < current_templates->end;
5252                    ++t)
5253                 {
5254                   allowed = operand_type_or (allowed, t->operand_types[op]);
5255                   allowed = operand_type_and (allowed, anyimm);
5256                 }
5257               switch (guess_suffix)
5258                 {
5259                 case QWORD_MNEM_SUFFIX:
5260                   mask.bitfield.imm64 = 1;
5261                   mask.bitfield.imm32s = 1;
5262                   break;
5263                 case LONG_MNEM_SUFFIX:
5264                   mask.bitfield.imm32 = 1;
5265                   break;
5266                 case WORD_MNEM_SUFFIX:
5267                   mask.bitfield.imm16 = 1;
5268                   break;
5269                 case BYTE_MNEM_SUFFIX:
5270                   mask.bitfield.imm8 = 1;
5271                   break;
5272                 default:
5273                   break;
5274                 }
5275               allowed = operand_type_and (mask, allowed);
5276               if (!operand_type_all_zero (&allowed))
5277                 i.types[op] = operand_type_and (i.types[op], mask);
5278             }
5279             break;
5280           }
5281       }
5282 }
5283
5284 /* Try to use the smallest displacement type too.  */
5285 static void
5286 optimize_disp (void)
5287 {
5288   int op;
5289
5290   for (op = i.operands; --op >= 0;)
5291     if (operand_type_check (i.types[op], disp))
5292       {
5293         if (i.op[op].disps->X_op == O_constant)
5294           {
5295             offsetT op_disp = i.op[op].disps->X_add_number;
5296
5297             if (i.types[op].bitfield.disp16
5298                 && (op_disp & ~(offsetT) 0xffff) == 0)
5299               {
5300                 /* If this operand is at most 16 bits, convert
5301                    to a signed 16 bit number and don't use 64bit
5302                    displacement.  */
5303                 op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
5304                 i.types[op].bitfield.disp64 = 0;
5305               }
5306 #ifdef BFD64
5307             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5308             if (i.types[op].bitfield.disp32
5309                 && (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
5310               {
5311                 /* If this operand is at most 32 bits, convert
5312                    to a signed 32 bit number and don't use 64bit
5313                    displacement.  */
5314                 op_disp &= (((offsetT) 2 << 31) - 1);
5315                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5316                 i.types[op].bitfield.disp64 = 0;
5317               }
5318 #endif
5319             if (!op_disp && i.types[op].bitfield.baseindex)
5320               {
5321                 i.types[op].bitfield.disp8 = 0;
5322                 i.types[op].bitfield.disp16 = 0;
5323                 i.types[op].bitfield.disp32 = 0;
5324                 i.types[op].bitfield.disp32s = 0;
5325                 i.types[op].bitfield.disp64 = 0;
5326                 i.op[op].disps = 0;
5327                 i.disp_operands--;
5328               }
5329             else if (flag_code == CODE_64BIT)
5330               {
5331                 if (fits_in_signed_long (op_disp))
5332                   {
5333                     i.types[op].bitfield.disp64 = 0;
5334                     i.types[op].bitfield.disp32s = 1;
5335                   }
5336                 if (i.prefix[ADDR_PREFIX]
5337                     && fits_in_unsigned_long (op_disp))
5338                   i.types[op].bitfield.disp32 = 1;
5339               }
5340             if ((i.types[op].bitfield.disp32
5341                  || i.types[op].bitfield.disp32s
5342                  || i.types[op].bitfield.disp16)
5343                 && fits_in_disp8 (op_disp))
5344               i.types[op].bitfield.disp8 = 1;
5345           }
5346         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5347                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5348           {
5349             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5350                          i.op[op].disps, 0, i.reloc[op]);
5351             i.types[op].bitfield.disp8 = 0;
5352             i.types[op].bitfield.disp16 = 0;
5353             i.types[op].bitfield.disp32 = 0;
5354             i.types[op].bitfield.disp32s = 0;
5355             i.types[op].bitfield.disp64 = 0;
5356           }
5357         else
5358           /* We only support 64bit displacement on constants.  */
5359           i.types[op].bitfield.disp64 = 0;
5360       }
5361 }
5362
5363 /* Return 1 if there is a match in broadcast bytes between operand
5364    GIVEN and instruction template T.   */
5365
5366 static INLINE int
5367 match_broadcast_size (const insn_template *t, unsigned int given)
5368 {
5369   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5370            && i.types[given].bitfield.byte)
5371           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5372               && i.types[given].bitfield.word)
5373           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5374               && i.types[given].bitfield.dword)
5375           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5376               && i.types[given].bitfield.qword));
5377 }
5378
5379 /* Check if operands are valid for the instruction.  */
5380
5381 static int
5382 check_VecOperands (const insn_template *t)
5383 {
5384   unsigned int op;
5385   i386_cpu_flags cpu;
5386
5387   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5388      any one operand are implicity requiring AVX512VL support if the actual
5389      operand size is YMMword or XMMword.  Since this function runs after
5390      template matching, there's no need to check for YMMword/XMMword in
5391      the template.  */
5392   cpu = cpu_flags_and (t->cpu_flags, avx512);
5393   if (!cpu_flags_all_zero (&cpu)
5394       && !t->cpu_flags.bitfield.cpuavx512vl
5395       && !cpu_arch_flags.bitfield.cpuavx512vl)
5396     {
5397       for (op = 0; op < t->operands; ++op)
5398         {
5399           if (t->operand_types[op].bitfield.zmmword
5400               && (i.types[op].bitfield.ymmword
5401                   || i.types[op].bitfield.xmmword))
5402             {
5403               i.error = unsupported;
5404               return 1;
5405             }
5406         }
5407     }
5408
5409   /* Without VSIB byte, we can't have a vector register for index.  */
5410   if (!t->opcode_modifier.vecsib
5411       && i.index_reg
5412       && (i.index_reg->reg_type.bitfield.xmmword
5413           || i.index_reg->reg_type.bitfield.ymmword
5414           || i.index_reg->reg_type.bitfield.zmmword))
5415     {
5416       i.error = unsupported_vector_index_register;
5417       return 1;
5418     }
5419
5420   /* Check if default mask is allowed.  */
5421   if (t->opcode_modifier.nodefmask
5422       && (!i.mask || i.mask->mask->reg_num == 0))
5423     {
5424       i.error = no_default_mask;
5425       return 1;
5426     }
5427
5428   /* For VSIB byte, we need a vector register for index, and all vector
5429      registers must be distinct.  */
5430   if (t->opcode_modifier.vecsib)
5431     {
5432       if (!i.index_reg
5433           || !((t->opcode_modifier.vecsib == VecSIB128
5434                 && i.index_reg->reg_type.bitfield.xmmword)
5435                || (t->opcode_modifier.vecsib == VecSIB256
5436                    && i.index_reg->reg_type.bitfield.ymmword)
5437                || (t->opcode_modifier.vecsib == VecSIB512
5438                    && i.index_reg->reg_type.bitfield.zmmword)))
5439       {
5440         i.error = invalid_vsib_address;
5441         return 1;
5442       }
5443
5444       gas_assert (i.reg_operands == 2 || i.mask);
5445       if (i.reg_operands == 2 && !i.mask)
5446         {
5447           gas_assert (i.types[0].bitfield.class == RegSIMD);
5448           gas_assert (i.types[0].bitfield.xmmword
5449                       || i.types[0].bitfield.ymmword);
5450           gas_assert (i.types[2].bitfield.class == RegSIMD);
5451           gas_assert (i.types[2].bitfield.xmmword
5452                       || i.types[2].bitfield.ymmword);
5453           if (operand_check == check_none)
5454             return 0;
5455           if (register_number (i.op[0].regs)
5456               != register_number (i.index_reg)
5457               && register_number (i.op[2].regs)
5458                  != register_number (i.index_reg)
5459               && register_number (i.op[0].regs)
5460                  != register_number (i.op[2].regs))
5461             return 0;
5462           if (operand_check == check_error)
5463             {
5464               i.error = invalid_vector_register_set;
5465               return 1;
5466             }
5467           as_warn (_("mask, index, and destination registers should be distinct"));
5468         }
5469       else if (i.reg_operands == 1 && i.mask)
5470         {
5471           if (i.types[1].bitfield.class == RegSIMD
5472               && (i.types[1].bitfield.xmmword
5473                   || i.types[1].bitfield.ymmword
5474                   || i.types[1].bitfield.zmmword)
5475               && (register_number (i.op[1].regs)
5476                   == register_number (i.index_reg)))
5477             {
5478               if (operand_check == check_error)
5479                 {
5480                   i.error = invalid_vector_register_set;
5481                   return 1;
5482                 }
5483               if (operand_check != check_none)
5484                 as_warn (_("index and destination registers should be distinct"));
5485             }
5486         }
5487     }
5488
5489   /* Check if broadcast is supported by the instruction and is applied
5490      to the memory operand.  */
5491   if (i.broadcast)
5492     {
5493       i386_operand_type type, overlap;
5494
5495       /* Check if specified broadcast is supported in this instruction,
5496          and its broadcast bytes match the memory operand.  */
5497       op = i.broadcast->operand;
5498       if (!t->opcode_modifier.broadcast
5499           || !(i.flags[op] & Operand_Mem)
5500           || (!i.types[op].bitfield.unspecified
5501               && !match_broadcast_size (t, op)))
5502         {
5503         bad_broadcast:
5504           i.error = unsupported_broadcast;
5505           return 1;
5506         }
5507
5508       i.broadcast->bytes = ((1 << (t->opcode_modifier.broadcast - 1))
5509                             * i.broadcast->type);
5510       operand_type_set (&type, 0);
5511       switch (i.broadcast->bytes)
5512         {
5513         case 2:
5514           type.bitfield.word = 1;
5515           break;
5516         case 4:
5517           type.bitfield.dword = 1;
5518           break;
5519         case 8:
5520           type.bitfield.qword = 1;
5521           break;
5522         case 16:
5523           type.bitfield.xmmword = 1;
5524           break;
5525         case 32:
5526           type.bitfield.ymmword = 1;
5527           break;
5528         case 64:
5529           type.bitfield.zmmword = 1;
5530           break;
5531         default:
5532           goto bad_broadcast;
5533         }
5534
5535       overlap = operand_type_and (type, t->operand_types[op]);
5536       if (operand_type_all_zero (&overlap))
5537           goto bad_broadcast;
5538
5539       if (t->opcode_modifier.checkregsize)
5540         {
5541           unsigned int j;
5542
5543           type.bitfield.baseindex = 1;
5544           for (j = 0; j < i.operands; ++j)
5545             {
5546               if (j != op
5547                   && !operand_type_register_match(i.types[j],
5548                                                   t->operand_types[j],
5549                                                   type,
5550                                                   t->operand_types[op]))
5551                 goto bad_broadcast;
5552             }
5553         }
5554     }
5555   /* If broadcast is supported in this instruction, we need to check if
5556      operand of one-element size isn't specified without broadcast.  */
5557   else if (t->opcode_modifier.broadcast && i.mem_operands)
5558     {
5559       /* Find memory operand.  */
5560       for (op = 0; op < i.operands; op++)
5561         if (i.flags[op] & Operand_Mem)
5562           break;
5563       gas_assert (op < i.operands);
5564       /* Check size of the memory operand.  */
5565       if (match_broadcast_size (t, op))
5566         {
5567           i.error = broadcast_needed;
5568           return 1;
5569         }
5570     }
5571   else
5572     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
5573
5574   /* Check if requested masking is supported.  */
5575   if (i.mask)
5576     {
5577       switch (t->opcode_modifier.masking)
5578         {
5579         case BOTH_MASKING:
5580           break;
5581         case MERGING_MASKING:
5582           if (i.mask->zeroing)
5583             {
5584         case 0:
5585               i.error = unsupported_masking;
5586               return 1;
5587             }
5588           break;
5589         case DYNAMIC_MASKING:
5590           /* Memory destinations allow only merging masking.  */
5591           if (i.mask->zeroing && i.mem_operands)
5592             {
5593               /* Find memory operand.  */
5594               for (op = 0; op < i.operands; op++)
5595                 if (i.flags[op] & Operand_Mem)
5596                   break;
5597               gas_assert (op < i.operands);
5598               if (op == i.operands - 1)
5599                 {
5600                   i.error = unsupported_masking;
5601                   return 1;
5602                 }
5603             }
5604           break;
5605         default:
5606           abort ();
5607         }
5608     }
5609
5610   /* Check if masking is applied to dest operand.  */
5611   if (i.mask && (i.mask->operand != (int) (i.operands - 1)))
5612     {
5613       i.error = mask_not_on_destination;
5614       return 1;
5615     }
5616
5617   /* Check RC/SAE.  */
5618   if (i.rounding)
5619     {
5620       if (!t->opcode_modifier.sae
5621           || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
5622         {
5623           i.error = unsupported_rc_sae;
5624           return 1;
5625         }
5626       /* If the instruction has several immediate operands and one of
5627          them is rounding, the rounding operand should be the last
5628          immediate operand.  */
5629       if (i.imm_operands > 1
5630           && i.rounding->operand != (int) (i.imm_operands - 1))
5631         {
5632           i.error = rc_sae_operand_not_last_imm;
5633           return 1;
5634         }
5635     }
5636
5637   /* Check vector Disp8 operand.  */
5638   if (t->opcode_modifier.disp8memshift
5639       && i.disp_encoding != disp_encoding_32bit)
5640     {
5641       if (i.broadcast)
5642         i.memshift = t->opcode_modifier.broadcast - 1;
5643       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
5644         i.memshift = t->opcode_modifier.disp8memshift;
5645       else
5646         {
5647           const i386_operand_type *type = NULL;
5648
5649           i.memshift = 0;
5650           for (op = 0; op < i.operands; op++)
5651             if (i.flags[op] & Operand_Mem)
5652               {
5653                 if (t->opcode_modifier.evex == EVEXLIG)
5654                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
5655                 else if (t->operand_types[op].bitfield.xmmword
5656                          + t->operand_types[op].bitfield.ymmword
5657                          + t->operand_types[op].bitfield.zmmword <= 1)
5658                   type = &t->operand_types[op];
5659                 else if (!i.types[op].bitfield.unspecified)
5660                   type = &i.types[op];
5661               }
5662             else if (i.types[op].bitfield.class == RegSIMD
5663                      && t->opcode_modifier.evex != EVEXLIG)
5664               {
5665                 if (i.types[op].bitfield.zmmword)
5666                   i.memshift = 6;
5667                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
5668                   i.memshift = 5;
5669                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
5670                   i.memshift = 4;
5671               }
5672
5673           if (type)
5674             {
5675               if (type->bitfield.zmmword)
5676                 i.memshift = 6;
5677               else if (type->bitfield.ymmword)
5678                 i.memshift = 5;
5679               else if (type->bitfield.xmmword)
5680                 i.memshift = 4;
5681             }
5682
5683           /* For the check in fits_in_disp8().  */
5684           if (i.memshift == 0)
5685             i.memshift = -1;
5686         }
5687
5688       for (op = 0; op < i.operands; op++)
5689         if (operand_type_check (i.types[op], disp)
5690             && i.op[op].disps->X_op == O_constant)
5691           {
5692             if (fits_in_disp8 (i.op[op].disps->X_add_number))
5693               {
5694                 i.types[op].bitfield.disp8 = 1;
5695                 return 0;
5696               }
5697             i.types[op].bitfield.disp8 = 0;
5698           }
5699     }
5700
5701   i.memshift = 0;
5702
5703   return 0;
5704 }
5705
5706 /* Check if operands are valid for the instruction.  Update VEX
5707    operand types.  */
5708
5709 static int
5710 VEX_check_operands (const insn_template *t)
5711 {
5712   if (i.vec_encoding == vex_encoding_evex)
5713     {
5714       /* This instruction must be encoded with EVEX prefix.  */
5715       if (!is_evex_encoding (t))
5716         {
5717           i.error = unsupported;
5718           return 1;
5719         }
5720       return 0;
5721     }
5722
5723   if (!t->opcode_modifier.vex)
5724     {
5725       /* This instruction template doesn't have VEX prefix.  */
5726       if (i.vec_encoding != vex_encoding_default)
5727         {
5728           i.error = unsupported;
5729           return 1;
5730         }
5731       return 0;
5732     }
5733
5734   /* Check the special Imm4 cases; must be the first operand.  */
5735   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
5736     {
5737       if (i.op[0].imms->X_op != O_constant
5738           || !fits_in_imm4 (i.op[0].imms->X_add_number))
5739         {
5740           i.error = bad_imm4;
5741           return 1;
5742         }
5743
5744       /* Turn off Imm<N> so that update_imm won't complain.  */
5745       operand_type_set (&i.types[0], 0);
5746     }
5747
5748   return 0;
5749 }
5750
5751 static const insn_template *
5752 match_template (char mnem_suffix)
5753 {
5754   /* Points to template once we've found it.  */
5755   const insn_template *t;
5756   i386_operand_type overlap0, overlap1, overlap2, overlap3;
5757   i386_operand_type overlap4;
5758   unsigned int found_reverse_match;
5759   i386_opcode_modifier suffix_check;
5760   i386_operand_type operand_types [MAX_OPERANDS];
5761   int addr_prefix_disp;
5762   unsigned int j, size_match, check_register;
5763   enum i386_error specific_error = 0;
5764
5765 #if MAX_OPERANDS != 5
5766 # error "MAX_OPERANDS must be 5."
5767 #endif
5768
5769   found_reverse_match = 0;
5770   addr_prefix_disp = -1;
5771
5772   /* Prepare for mnemonic suffix check.  */
5773   memset (&suffix_check, 0, sizeof (suffix_check));
5774   switch (mnem_suffix)
5775     {
5776     case BYTE_MNEM_SUFFIX:
5777       suffix_check.no_bsuf = 1;
5778       break;
5779     case WORD_MNEM_SUFFIX:
5780       suffix_check.no_wsuf = 1;
5781       break;
5782     case SHORT_MNEM_SUFFIX:
5783       suffix_check.no_ssuf = 1;
5784       break;
5785     case LONG_MNEM_SUFFIX:
5786       suffix_check.no_lsuf = 1;
5787       break;
5788     case QWORD_MNEM_SUFFIX:
5789       suffix_check.no_qsuf = 1;
5790       break;
5791     default:
5792       /* NB: In Intel syntax, normally we can check for memory operand
5793          size when there is no mnemonic suffix.  But jmp and call have
5794          2 different encodings with Dword memory operand size, one with
5795          No_ldSuf and the other without.  i.suffix is set to
5796          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
5797       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
5798         suffix_check.no_ldsuf = 1;
5799     }
5800
5801   /* Must have right number of operands.  */
5802   i.error = number_of_operands_mismatch;
5803
5804   for (t = current_templates->start; t < current_templates->end; t++)
5805     {
5806       addr_prefix_disp = -1;
5807       found_reverse_match = 0;
5808
5809       if (i.operands != t->operands)
5810         continue;
5811
5812       /* Check processor support.  */
5813       i.error = unsupported;
5814       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
5815         continue;
5816
5817       /* Check AT&T mnemonic.   */
5818       i.error = unsupported_with_intel_mnemonic;
5819       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
5820         continue;
5821
5822       /* Check AT&T/Intel syntax.  */
5823       i.error = unsupported_syntax;
5824       if ((intel_syntax && t->opcode_modifier.attsyntax)
5825           || (!intel_syntax && t->opcode_modifier.intelsyntax))
5826         continue;
5827
5828       /* Check Intel64/AMD64 ISA.   */
5829       switch (isa64)
5830         {
5831         default:
5832           /* Default: Don't accept Intel64.  */
5833           if (t->opcode_modifier.isa64 == INTEL64)
5834             continue;
5835           break;
5836         case amd64:
5837           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
5838           if (t->opcode_modifier.isa64 >= INTEL64)
5839             continue;
5840           break;
5841         case intel64:
5842           /* -mintel64: Don't accept AMD64.  */
5843           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
5844             continue;
5845           break;
5846         }
5847
5848       /* Check the suffix.  */
5849       i.error = invalid_instruction_suffix;
5850       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
5851           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
5852           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
5853           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
5854           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
5855           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
5856         continue;
5857
5858       size_match = operand_size_match (t);
5859       if (!size_match)
5860         continue;
5861
5862       /* This is intentionally not
5863
5864          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
5865
5866          as the case of a missing * on the operand is accepted (perhaps with
5867          a warning, issued further down).  */
5868       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
5869         {
5870           i.error = operand_type_mismatch;
5871           continue;
5872         }
5873
5874       for (j = 0; j < MAX_OPERANDS; j++)
5875         operand_types[j] = t->operand_types[j];
5876
5877       /* In general, don't allow 64-bit operands in 32-bit mode.  */
5878       if (i.suffix == QWORD_MNEM_SUFFIX
5879           && flag_code != CODE_64BIT
5880           && (intel_syntax
5881               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
5882                  && !t->opcode_modifier.broadcast
5883                  && !intel_float_operand (t->name))
5884               : intel_float_operand (t->name) != 2)
5885           && ((operand_types[0].bitfield.class != RegMMX
5886                && operand_types[0].bitfield.class != RegSIMD)
5887               || (operand_types[t->operands > 1].bitfield.class != RegMMX
5888                   && operand_types[t->operands > 1].bitfield.class != RegSIMD))
5889           && (t->base_opcode != 0x0fc7
5890               || t->extension_opcode != 1 /* cmpxchg8b */))
5891         continue;
5892
5893       /* In general, don't allow 32-bit operands on pre-386.  */
5894       else if (i.suffix == LONG_MNEM_SUFFIX
5895                && !cpu_arch_flags.bitfield.cpui386
5896                && (intel_syntax
5897                    ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
5898                       && !intel_float_operand (t->name))
5899                    : intel_float_operand (t->name) != 2)
5900                && ((operand_types[0].bitfield.class != RegMMX
5901                     && operand_types[0].bitfield.class != RegSIMD)
5902                    || (operand_types[t->operands > 1].bitfield.class != RegMMX
5903                        && operand_types[t->operands > 1].bitfield.class
5904                           != RegSIMD)))
5905         continue;
5906
5907       /* Do not verify operands when there are none.  */
5908       else
5909         {
5910           if (!t->operands)
5911             /* We've found a match; break out of loop.  */
5912             break;
5913         }
5914
5915       if (!t->opcode_modifier.jump
5916           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
5917         {
5918           /* There should be only one Disp operand.  */
5919           for (j = 0; j < MAX_OPERANDS; j++)
5920             if (operand_type_check (operand_types[j], disp))
5921               break;
5922           if (j < MAX_OPERANDS)
5923             {
5924               bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
5925
5926               addr_prefix_disp = j;
5927
5928               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
5929                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
5930               switch (flag_code)
5931                 {
5932                 case CODE_16BIT:
5933                   override = !override;
5934                   /* Fall through.  */
5935                 case CODE_32BIT:
5936                   if (operand_types[j].bitfield.disp32
5937                       && operand_types[j].bitfield.disp16)
5938                     {
5939                       operand_types[j].bitfield.disp16 = override;
5940                       operand_types[j].bitfield.disp32 = !override;
5941                     }
5942                   operand_types[j].bitfield.disp32s = 0;
5943                   operand_types[j].bitfield.disp64 = 0;
5944                   break;
5945
5946                 case CODE_64BIT:
5947                   if (operand_types[j].bitfield.disp32s
5948                       || operand_types[j].bitfield.disp64)
5949                     {
5950                       operand_types[j].bitfield.disp64 &= !override;
5951                       operand_types[j].bitfield.disp32s &= !override;
5952                       operand_types[j].bitfield.disp32 = override;
5953                     }
5954                   operand_types[j].bitfield.disp16 = 0;
5955                   break;
5956                 }
5957             }
5958         }
5959
5960       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
5961       if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
5962         continue;
5963
5964       /* We check register size if needed.  */
5965       if (t->opcode_modifier.checkregsize)
5966         {
5967           check_register = (1 << t->operands) - 1;
5968           if (i.broadcast)
5969             check_register &= ~(1 << i.broadcast->operand);
5970         }
5971       else
5972         check_register = 0;
5973
5974       overlap0 = operand_type_and (i.types[0], operand_types[0]);
5975       switch (t->operands)
5976         {
5977         case 1:
5978           if (!operand_type_match (overlap0, i.types[0]))
5979             continue;
5980           break;
5981         case 2:
5982           /* xchg %eax, %eax is a special case. It is an alias for nop
5983              only in 32bit mode and we can use opcode 0x90.  In 64bit
5984              mode, we can't use 0x90 for xchg %eax, %eax since it should
5985              zero-extend %eax to %rax.  */
5986           if (flag_code == CODE_64BIT
5987               && t->base_opcode == 0x90
5988               && i.types[0].bitfield.instance == Accum
5989               && i.types[0].bitfield.dword
5990               && i.types[1].bitfield.instance == Accum
5991               && i.types[1].bitfield.dword)
5992             continue;
5993           /* xrelease mov %eax, <disp> is another special case. It must not
5994              match the accumulator-only encoding of mov.  */
5995           if (flag_code != CODE_64BIT
5996               && i.hle_prefix
5997               && t->base_opcode == 0xa0
5998               && i.types[0].bitfield.instance == Accum
5999               && (i.flags[1] & Operand_Mem))
6000             continue;
6001           /* Fall through.  */
6002
6003         case 3:
6004           if (!(size_match & MATCH_STRAIGHT))
6005             goto check_reverse;
6006           /* Reverse direction of operands if swapping is possible in the first
6007              place (operands need to be symmetric) and
6008              - the load form is requested, and the template is a store form,
6009              - the store form is requested, and the template is a load form,
6010              - the non-default (swapped) form is requested.  */
6011           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6012           if (t->opcode_modifier.d && i.reg_operands == i.operands
6013               && !operand_type_all_zero (&overlap1))
6014             switch (i.dir_encoding)
6015               {
6016               case dir_encoding_load:
6017                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6018                     || t->opcode_modifier.regmem)
6019                   goto check_reverse;
6020                 break;
6021
6022               case dir_encoding_store:
6023                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6024                     && !t->opcode_modifier.regmem)
6025                   goto check_reverse;
6026                 break;
6027
6028               case dir_encoding_swap:
6029                 goto check_reverse;
6030
6031               case dir_encoding_default:
6032                 break;
6033               }
6034           /* If we want store form, we skip the current load.  */
6035           if ((i.dir_encoding == dir_encoding_store
6036                || i.dir_encoding == dir_encoding_swap)
6037               && i.mem_operands == 0
6038               && t->opcode_modifier.load)
6039             continue;
6040           /* Fall through.  */
6041         case 4:
6042         case 5:
6043           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6044           if (!operand_type_match (overlap0, i.types[0])
6045               || !operand_type_match (overlap1, i.types[1])
6046               || ((check_register & 3) == 3
6047                   && !operand_type_register_match (i.types[0],
6048                                                    operand_types[0],
6049                                                    i.types[1],
6050                                                    operand_types[1])))
6051             {
6052               /* Check if other direction is valid ...  */
6053               if (!t->opcode_modifier.d)
6054                 continue;
6055
6056             check_reverse:
6057               if (!(size_match & MATCH_REVERSE))
6058                 continue;
6059               /* Try reversing direction of operands.  */
6060               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6061               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6062               if (!operand_type_match (overlap0, i.types[0])
6063                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6064                   || (check_register
6065                       && !operand_type_register_match (i.types[0],
6066                                                        operand_types[i.operands - 1],
6067                                                        i.types[i.operands - 1],
6068                                                        operand_types[0])))
6069                 {
6070                   /* Does not match either direction.  */
6071                   continue;
6072                 }
6073               /* found_reverse_match holds which of D or FloatR
6074                  we've found.  */
6075               if (!t->opcode_modifier.d)
6076                 found_reverse_match = 0;
6077               else if (operand_types[0].bitfield.tbyte)
6078                 found_reverse_match = Opcode_FloatD;
6079               else if (operand_types[0].bitfield.xmmword
6080                        || operand_types[i.operands - 1].bitfield.xmmword
6081                        || operand_types[0].bitfield.class == RegMMX
6082                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6083                        || is_any_vex_encoding(t))
6084                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6085                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6086               else
6087                 found_reverse_match = Opcode_D;
6088               if (t->opcode_modifier.floatr)
6089                 found_reverse_match |= Opcode_FloatR;
6090             }
6091           else
6092             {
6093               /* Found a forward 2 operand match here.  */
6094               switch (t->operands)
6095                 {
6096                 case 5:
6097                   overlap4 = operand_type_and (i.types[4],
6098                                                operand_types[4]);
6099                   /* Fall through.  */
6100                 case 4:
6101                   overlap3 = operand_type_and (i.types[3],
6102                                                operand_types[3]);
6103                   /* Fall through.  */
6104                 case 3:
6105                   overlap2 = operand_type_and (i.types[2],
6106                                                operand_types[2]);
6107                   break;
6108                 }
6109
6110               switch (t->operands)
6111                 {
6112                 case 5:
6113                   if (!operand_type_match (overlap4, i.types[4])
6114                       || !operand_type_register_match (i.types[3],
6115                                                        operand_types[3],
6116                                                        i.types[4],
6117                                                        operand_types[4]))
6118                     continue;
6119                   /* Fall through.  */
6120                 case 4:
6121                   if (!operand_type_match (overlap3, i.types[3])
6122                       || ((check_register & 0xa) == 0xa
6123                           && !operand_type_register_match (i.types[1],
6124                                                             operand_types[1],
6125                                                             i.types[3],
6126                                                             operand_types[3]))
6127                       || ((check_register & 0xc) == 0xc
6128                           && !operand_type_register_match (i.types[2],
6129                                                             operand_types[2],
6130                                                             i.types[3],
6131                                                             operand_types[3])))
6132                     continue;
6133                   /* Fall through.  */
6134                 case 3:
6135                   /* Here we make use of the fact that there are no
6136                      reverse match 3 operand instructions.  */
6137                   if (!operand_type_match (overlap2, i.types[2])
6138                       || ((check_register & 5) == 5
6139                           && !operand_type_register_match (i.types[0],
6140                                                             operand_types[0],
6141                                                             i.types[2],
6142                                                             operand_types[2]))
6143                       || ((check_register & 6) == 6
6144                           && !operand_type_register_match (i.types[1],
6145                                                             operand_types[1],
6146                                                             i.types[2],
6147                                                             operand_types[2])))
6148                     continue;
6149                   break;
6150                 }
6151             }
6152           /* Found either forward/reverse 2, 3 or 4 operand match here:
6153              slip through to break.  */
6154         }
6155
6156       /* Check if vector and VEX operands are valid.  */
6157       if (check_VecOperands (t) || VEX_check_operands (t))
6158         {
6159           specific_error = i.error;
6160           continue;
6161         }
6162
6163       /* We've found a match; break out of loop.  */
6164       break;
6165     }
6166
6167   if (t == current_templates->end)
6168     {
6169       /* We found no match.  */
6170       const char *err_msg;
6171       switch (specific_error ? specific_error : i.error)
6172         {
6173         default:
6174           abort ();
6175         case operand_size_mismatch:
6176           err_msg = _("operand size mismatch");
6177           break;
6178         case operand_type_mismatch:
6179           err_msg = _("operand type mismatch");
6180           break;
6181         case register_type_mismatch:
6182           err_msg = _("register type mismatch");
6183           break;
6184         case number_of_operands_mismatch:
6185           err_msg = _("number of operands mismatch");
6186           break;
6187         case invalid_instruction_suffix:
6188           err_msg = _("invalid instruction suffix");
6189           break;
6190         case bad_imm4:
6191           err_msg = _("constant doesn't fit in 4 bits");
6192           break;
6193         case unsupported_with_intel_mnemonic:
6194           err_msg = _("unsupported with Intel mnemonic");
6195           break;
6196         case unsupported_syntax:
6197           err_msg = _("unsupported syntax");
6198           break;
6199         case unsupported:
6200           as_bad (_("unsupported instruction `%s'"),
6201                   current_templates->start->name);
6202           return NULL;
6203         case invalid_vsib_address:
6204           err_msg = _("invalid VSIB address");
6205           break;
6206         case invalid_vector_register_set:
6207           err_msg = _("mask, index, and destination registers must be distinct");
6208           break;
6209         case unsupported_vector_index_register:
6210           err_msg = _("unsupported vector index register");
6211           break;
6212         case unsupported_broadcast:
6213           err_msg = _("unsupported broadcast");
6214           break;
6215         case broadcast_needed:
6216           err_msg = _("broadcast is needed for operand of such type");
6217           break;
6218         case unsupported_masking:
6219           err_msg = _("unsupported masking");
6220           break;
6221         case mask_not_on_destination:
6222           err_msg = _("mask not on destination operand");
6223           break;
6224         case no_default_mask:
6225           err_msg = _("default mask isn't allowed");
6226           break;
6227         case unsupported_rc_sae:
6228           err_msg = _("unsupported static rounding/sae");
6229           break;
6230         case rc_sae_operand_not_last_imm:
6231           if (intel_syntax)
6232             err_msg = _("RC/SAE operand must precede immediate operands");
6233           else
6234             err_msg = _("RC/SAE operand must follow immediate operands");
6235           break;
6236         case invalid_register_operand:
6237           err_msg = _("invalid register operand");
6238           break;
6239         }
6240       as_bad (_("%s for `%s'"), err_msg,
6241               current_templates->start->name);
6242       return NULL;
6243     }
6244
6245   if (!quiet_warnings)
6246     {
6247       if (!intel_syntax
6248           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6249         as_warn (_("indirect %s without `*'"), t->name);
6250
6251       if (t->opcode_modifier.isprefix
6252           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6253         {
6254           /* Warn them that a data or address size prefix doesn't
6255              affect assembly of the next line of code.  */
6256           as_warn (_("stand-alone `%s' prefix"), t->name);
6257         }
6258     }
6259
6260   /* Copy the template we found.  */
6261   i.tm = *t;
6262
6263   if (addr_prefix_disp != -1)
6264     i.tm.operand_types[addr_prefix_disp]
6265       = operand_types[addr_prefix_disp];
6266
6267   if (found_reverse_match)
6268     {
6269       /* If we found a reverse match we must alter the opcode direction
6270          bit and clear/flip the regmem modifier one.  found_reverse_match
6271          holds bits to change (different for int & float insns).  */
6272
6273       i.tm.base_opcode ^= found_reverse_match;
6274
6275       i.tm.operand_types[0] = operand_types[i.operands - 1];
6276       i.tm.operand_types[i.operands - 1] = operand_types[0];
6277
6278       /* Certain SIMD insns have their load forms specified in the opcode
6279          table, and hence we need to _set_ RegMem instead of clearing it.
6280          We need to avoid setting the bit though on insns like KMOVW.  */
6281       i.tm.opcode_modifier.regmem
6282         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6283           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6284           && !i.tm.opcode_modifier.regmem;
6285     }
6286
6287   return t;
6288 }
6289
6290 static int
6291 check_string (void)
6292 {
6293   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6294   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6295
6296   if (i.seg[op] != NULL && i.seg[op] != &es)
6297     {
6298       as_bad (_("`%s' operand %u must use `%ses' segment"),
6299               i.tm.name,
6300               intel_syntax ? i.tm.operands - es_op : es_op + 1,
6301               register_prefix);
6302       return 0;
6303     }
6304
6305   /* There's only ever one segment override allowed per instruction.
6306      This instruction possibly has a legal segment override on the
6307      second operand, so copy the segment to where non-string
6308      instructions store it, allowing common code.  */
6309   i.seg[op] = i.seg[1];
6310
6311   return 1;
6312 }
6313
6314 static int
6315 process_suffix (void)
6316 {
6317   /* If matched instruction specifies an explicit instruction mnemonic
6318      suffix, use it.  */
6319   if (i.tm.opcode_modifier.size == SIZE16)
6320     i.suffix = WORD_MNEM_SUFFIX;
6321   else if (i.tm.opcode_modifier.size == SIZE32)
6322     i.suffix = LONG_MNEM_SUFFIX;
6323   else if (i.tm.opcode_modifier.size == SIZE64)
6324     i.suffix = QWORD_MNEM_SUFFIX;
6325   else if (i.reg_operands
6326            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
6327            && !i.tm.opcode_modifier.addrprefixopreg)
6328     {
6329       unsigned int numop = i.operands;
6330
6331       /* movsx/movzx want only their source operand considered here, for the
6332          ambiguity checking below.  The suffix will be replaced afterwards
6333          to represent the destination (register).  */
6334       if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
6335           || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
6336         --i.operands;
6337
6338       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
6339       if (i.tm.base_opcode == 0xf20f38f0
6340           && i.tm.operand_types[1].bitfield.qword)
6341         i.rex |= REX_W;
6342
6343       /* If there's no instruction mnemonic suffix we try to invent one
6344          based on GPR operands.  */
6345       if (!i.suffix)
6346         {
6347           /* We take i.suffix from the last register operand specified,
6348              Destination register type is more significant than source
6349              register type.  crc32 in SSE4.2 prefers source register
6350              type. */
6351           unsigned int op = i.tm.base_opcode != 0xf20f38f0 ? i.operands : 1;
6352
6353           while (op--)
6354             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
6355                 || i.tm.operand_types[op].bitfield.instance == Accum)
6356               {
6357                 if (i.types[op].bitfield.class != Reg)
6358                   continue;
6359                 if (i.types[op].bitfield.byte)
6360                   i.suffix = BYTE_MNEM_SUFFIX;
6361                 else if (i.types[op].bitfield.word)
6362                   i.suffix = WORD_MNEM_SUFFIX;
6363                 else if (i.types[op].bitfield.dword)
6364                   i.suffix = LONG_MNEM_SUFFIX;
6365                 else if (i.types[op].bitfield.qword)
6366                   i.suffix = QWORD_MNEM_SUFFIX;
6367                 else
6368                   continue;
6369                 break;
6370               }
6371
6372           /* As an exception, movsx/movzx silently default to a byte source
6373              in AT&T mode.  */
6374           if ((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w
6375               && !i.suffix && !intel_syntax)
6376             i.suffix = BYTE_MNEM_SUFFIX;
6377         }
6378       else if (i.suffix == BYTE_MNEM_SUFFIX)
6379         {
6380           if (intel_syntax
6381               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6382               && i.tm.opcode_modifier.no_bsuf)
6383             i.suffix = 0;
6384           else if (!check_byte_reg ())
6385             return 0;
6386         }
6387       else if (i.suffix == LONG_MNEM_SUFFIX)
6388         {
6389           if (intel_syntax
6390               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6391               && i.tm.opcode_modifier.no_lsuf
6392               && !i.tm.opcode_modifier.todword
6393               && !i.tm.opcode_modifier.toqword)
6394             i.suffix = 0;
6395           else if (!check_long_reg ())
6396             return 0;
6397         }
6398       else if (i.suffix == QWORD_MNEM_SUFFIX)
6399         {
6400           if (intel_syntax
6401               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6402               && i.tm.opcode_modifier.no_qsuf
6403               && !i.tm.opcode_modifier.todword
6404               && !i.tm.opcode_modifier.toqword)
6405             i.suffix = 0;
6406           else if (!check_qword_reg ())
6407             return 0;
6408         }
6409       else if (i.suffix == WORD_MNEM_SUFFIX)
6410         {
6411           if (intel_syntax
6412               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6413               && i.tm.opcode_modifier.no_wsuf)
6414             i.suffix = 0;
6415           else if (!check_word_reg ())
6416             return 0;
6417         }
6418       else if (intel_syntax
6419                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
6420         /* Do nothing if the instruction is going to ignore the prefix.  */
6421         ;
6422       else
6423         abort ();
6424
6425       /* Undo the movsx/movzx change done above.  */
6426       i.operands = numop;
6427     }
6428   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
6429            && !i.suffix)
6430     {
6431       i.suffix = stackop_size;
6432       if (stackop_size == LONG_MNEM_SUFFIX)
6433         {
6434           /* stackop_size is set to LONG_MNEM_SUFFIX for the
6435              .code16gcc directive to support 16-bit mode with
6436              32-bit address.  For IRET without a suffix, generate
6437              16-bit IRET (opcode 0xcf) to return from an interrupt
6438              handler.  */
6439           if (i.tm.base_opcode == 0xcf)
6440             {
6441               i.suffix = WORD_MNEM_SUFFIX;
6442               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
6443             }
6444           /* Warn about changed behavior for segment register push/pop.  */
6445           else if ((i.tm.base_opcode | 1) == 0x07)
6446             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
6447                      i.tm.name);
6448         }
6449     }
6450   else if (!i.suffix
6451            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
6452                || i.tm.opcode_modifier.jump == JUMP_BYTE
6453                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
6454                || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */
6455                    && i.tm.extension_opcode <= 3)))
6456     {
6457       switch (flag_code)
6458         {
6459         case CODE_64BIT:
6460           if (!i.tm.opcode_modifier.no_qsuf)
6461             {
6462               i.suffix = QWORD_MNEM_SUFFIX;
6463               break;
6464             }
6465           /* Fall through.  */
6466         case CODE_32BIT:
6467           if (!i.tm.opcode_modifier.no_lsuf)
6468             i.suffix = LONG_MNEM_SUFFIX;
6469           break;
6470         case CODE_16BIT:
6471           if (!i.tm.opcode_modifier.no_wsuf)
6472             i.suffix = WORD_MNEM_SUFFIX;
6473           break;
6474         }
6475     }
6476
6477   if (!i.suffix
6478       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
6479           /* Also cover lret/retf/iret in 64-bit mode.  */
6480           || (flag_code == CODE_64BIT
6481               && !i.tm.opcode_modifier.no_lsuf
6482               && !i.tm.opcode_modifier.no_qsuf))
6483       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
6484       /* Accept FLDENV et al without suffix.  */
6485       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
6486     {
6487       unsigned int suffixes, evex = 0;
6488
6489       suffixes = !i.tm.opcode_modifier.no_bsuf;
6490       if (!i.tm.opcode_modifier.no_wsuf)
6491         suffixes |= 1 << 1;
6492       if (!i.tm.opcode_modifier.no_lsuf)
6493         suffixes |= 1 << 2;
6494       if (!i.tm.opcode_modifier.no_ldsuf)
6495         suffixes |= 1 << 3;
6496       if (!i.tm.opcode_modifier.no_ssuf)
6497         suffixes |= 1 << 4;
6498       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
6499         suffixes |= 1 << 5;
6500
6501       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
6502          also suitable for AT&T syntax mode, it was requested that this be
6503          restricted to just Intel syntax.  */
6504       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast)
6505         {
6506           unsigned int op;
6507
6508           for (op = 0; op < i.tm.operands; ++op)
6509             {
6510               if (is_evex_encoding (&i.tm)
6511                   && !cpu_arch_flags.bitfield.cpuavx512vl)
6512                 {
6513                   if (i.tm.operand_types[op].bitfield.ymmword)
6514                     i.tm.operand_types[op].bitfield.xmmword = 0;
6515                   if (i.tm.operand_types[op].bitfield.zmmword)
6516                     i.tm.operand_types[op].bitfield.ymmword = 0;
6517                   if (!i.tm.opcode_modifier.evex
6518                       || i.tm.opcode_modifier.evex == EVEXDYN)
6519                     i.tm.opcode_modifier.evex = EVEX512;
6520                 }
6521
6522               if (i.tm.operand_types[op].bitfield.xmmword
6523                   + i.tm.operand_types[op].bitfield.ymmword
6524                   + i.tm.operand_types[op].bitfield.zmmword < 2)
6525                 continue;
6526
6527               /* Any properly sized operand disambiguates the insn.  */
6528               if (i.types[op].bitfield.xmmword
6529                   || i.types[op].bitfield.ymmword
6530                   || i.types[op].bitfield.zmmword)
6531                 {
6532                   suffixes &= ~(7 << 6);
6533                   evex = 0;
6534                   break;
6535                 }
6536
6537               if ((i.flags[op] & Operand_Mem)
6538                   && i.tm.operand_types[op].bitfield.unspecified)
6539                 {
6540                   if (i.tm.operand_types[op].bitfield.xmmword)
6541                     suffixes |= 1 << 6;
6542                   if (i.tm.operand_types[op].bitfield.ymmword)
6543                     suffixes |= 1 << 7;
6544                   if (i.tm.operand_types[op].bitfield.zmmword)
6545                     suffixes |= 1 << 8;
6546                   if (is_evex_encoding (&i.tm))
6547                     evex = EVEX512;
6548                 }
6549             }
6550         }
6551
6552       /* Are multiple suffixes / operand sizes allowed?  */
6553       if (suffixes & (suffixes - 1))
6554         {
6555           if (intel_syntax
6556               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
6557                   || operand_check == check_error))
6558             {
6559               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
6560               return 0;
6561             }
6562           if (operand_check == check_error)
6563             {
6564               as_bad (_("no instruction mnemonic suffix given and "
6565                         "no register operands; can't size `%s'"), i.tm.name);
6566               return 0;
6567             }
6568           if (operand_check == check_warning)
6569             as_warn (_("%s; using default for `%s'"),
6570                        intel_syntax
6571                        ? _("ambiguous operand size")
6572                        : _("no instruction mnemonic suffix given and "
6573                            "no register operands"),
6574                        i.tm.name);
6575
6576           if (i.tm.opcode_modifier.floatmf)
6577             i.suffix = SHORT_MNEM_SUFFIX;
6578           else if ((i.tm.base_opcode | 8) == 0xfbe
6579                    || (i.tm.base_opcode == 0x63
6580                        && i.tm.cpu_flags.bitfield.cpu64))
6581             /* handled below */;
6582           else if (evex)
6583             i.tm.opcode_modifier.evex = evex;
6584           else if (flag_code == CODE_16BIT)
6585             i.suffix = WORD_MNEM_SUFFIX;
6586           else if (!i.tm.opcode_modifier.no_lsuf)
6587             i.suffix = LONG_MNEM_SUFFIX;
6588           else
6589             i.suffix = QWORD_MNEM_SUFFIX;
6590         }
6591     }
6592
6593   if ((i.tm.base_opcode | 8) == 0xfbe
6594       || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
6595     {
6596       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
6597          In AT&T syntax, if there is no suffix (warned about above), the default
6598          will be byte extension.  */
6599       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
6600         i.tm.base_opcode |= 1;
6601
6602       /* For further processing, the suffix should represent the destination
6603          (register).  This is already the case when one was used with
6604          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
6605          no suffix to begin with.  */
6606       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
6607         {
6608           if (i.types[1].bitfield.word)
6609             i.suffix = WORD_MNEM_SUFFIX;
6610           else if (i.types[1].bitfield.qword)
6611             i.suffix = QWORD_MNEM_SUFFIX;
6612           else
6613             i.suffix = LONG_MNEM_SUFFIX;
6614
6615           i.tm.opcode_modifier.w = 0;
6616         }
6617     }
6618
6619   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
6620     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
6621                    != (i.tm.operand_types[1].bitfield.class == Reg);
6622
6623   /* Change the opcode based on the operand size given by i.suffix.  */
6624   switch (i.suffix)
6625     {
6626     /* Size floating point instruction.  */
6627     case LONG_MNEM_SUFFIX:
6628       if (i.tm.opcode_modifier.floatmf)
6629         {
6630           i.tm.base_opcode ^= 4;
6631           break;
6632         }
6633     /* fall through */
6634     case WORD_MNEM_SUFFIX:
6635     case QWORD_MNEM_SUFFIX:
6636       /* It's not a byte, select word/dword operation.  */
6637       if (i.tm.opcode_modifier.w)
6638         {
6639           if (i.short_form)
6640             i.tm.base_opcode |= 8;
6641           else
6642             i.tm.base_opcode |= 1;
6643         }
6644     /* fall through */
6645     case SHORT_MNEM_SUFFIX:
6646       /* Now select between word & dword operations via the operand
6647          size prefix, except for instructions that will ignore this
6648          prefix anyway.  */
6649       if (i.suffix != QWORD_MNEM_SUFFIX
6650           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
6651           && !i.tm.opcode_modifier.floatmf
6652           && !is_any_vex_encoding (&i.tm)
6653           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
6654               || (flag_code == CODE_64BIT
6655                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
6656         {
6657           unsigned int prefix = DATA_PREFIX_OPCODE;
6658
6659           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
6660             prefix = ADDR_PREFIX_OPCODE;
6661
6662           if (!add_prefix (prefix))
6663             return 0;
6664         }
6665
6666       /* Set mode64 for an operand.  */
6667       if (i.suffix == QWORD_MNEM_SUFFIX
6668           && flag_code == CODE_64BIT
6669           && !i.tm.opcode_modifier.norex64
6670           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
6671              need rex64. */
6672           && ! (i.operands == 2
6673                 && i.tm.base_opcode == 0x90
6674                 && i.tm.extension_opcode == None
6675                 && i.types[0].bitfield.instance == Accum
6676                 && i.types[0].bitfield.qword
6677                 && i.types[1].bitfield.instance == Accum
6678                 && i.types[1].bitfield.qword))
6679         i.rex |= REX_W;
6680
6681       break;
6682     }
6683
6684   if (i.tm.opcode_modifier.addrprefixopreg)
6685     {
6686       gas_assert (!i.suffix);
6687       gas_assert (i.reg_operands);
6688
6689       if (i.tm.operand_types[0].bitfield.instance == Accum
6690           || i.operands == 1)
6691         {
6692           /* The address size override prefix changes the size of the
6693              first operand.  */
6694           if (flag_code == CODE_64BIT
6695               && i.op[0].regs->reg_type.bitfield.word)
6696             {
6697               as_bad (_("16-bit addressing unavailable for `%s'"),
6698                       i.tm.name);
6699               return 0;
6700             }
6701
6702           if ((flag_code == CODE_32BIT
6703                ? i.op[0].regs->reg_type.bitfield.word
6704                : i.op[0].regs->reg_type.bitfield.dword)
6705               && !add_prefix (ADDR_PREFIX_OPCODE))
6706             return 0;
6707         }
6708       else
6709         {
6710           /* Check invalid register operand when the address size override
6711              prefix changes the size of register operands.  */
6712           unsigned int op;
6713           enum { need_word, need_dword, need_qword } need;
6714
6715           if (flag_code == CODE_32BIT)
6716             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
6717           else if (i.prefix[ADDR_PREFIX])
6718             need = need_dword;
6719           else
6720             need = flag_code == CODE_64BIT ? need_qword : need_word;
6721
6722           for (op = 0; op < i.operands; op++)
6723             {
6724               if (i.types[op].bitfield.class != Reg)
6725                 continue;
6726
6727               switch (need)
6728                 {
6729                 case need_word:
6730                   if (i.op[op].regs->reg_type.bitfield.word)
6731                     continue;
6732                   break;
6733                 case need_dword:
6734                   if (i.op[op].regs->reg_type.bitfield.dword)
6735                     continue;
6736                   break;
6737                 case need_qword:
6738                   if (i.op[op].regs->reg_type.bitfield.qword)
6739                     continue;
6740                   break;
6741                 }
6742
6743               as_bad (_("invalid register operand size for `%s'"),
6744                       i.tm.name);
6745               return 0;
6746             }
6747         }
6748     }
6749
6750   return 1;
6751 }
6752
6753 static int
6754 check_byte_reg (void)
6755 {
6756   int op;
6757
6758   for (op = i.operands; --op >= 0;)
6759     {
6760       /* Skip non-register operands. */
6761       if (i.types[op].bitfield.class != Reg)
6762         continue;
6763
6764       /* If this is an eight bit register, it's OK.  If it's the 16 or
6765          32 bit version of an eight bit register, we will just use the
6766          low portion, and that's OK too.  */
6767       if (i.types[op].bitfield.byte)
6768         continue;
6769
6770       /* I/O port address operands are OK too.  */
6771       if (i.tm.operand_types[op].bitfield.instance == RegD
6772           && i.tm.operand_types[op].bitfield.word)
6773         continue;
6774
6775       /* crc32 only wants its source operand checked here.  */
6776       if (i.tm.base_opcode == 0xf20f38f0 && op)
6777         continue;
6778
6779       /* Any other register is bad.  */
6780       if (i.types[op].bitfield.class == Reg
6781           || i.types[op].bitfield.class == RegMMX
6782           || i.types[op].bitfield.class == RegSIMD
6783           || i.types[op].bitfield.class == SReg
6784           || i.types[op].bitfield.class == RegCR
6785           || i.types[op].bitfield.class == RegDR
6786           || i.types[op].bitfield.class == RegTR)
6787         {
6788           as_bad (_("`%s%s' not allowed with `%s%c'"),
6789                   register_prefix,
6790                   i.op[op].regs->reg_name,
6791                   i.tm.name,
6792                   i.suffix);
6793           return 0;
6794         }
6795     }
6796   return 1;
6797 }
6798
6799 static int
6800 check_long_reg (void)
6801 {
6802   int op;
6803
6804   for (op = i.operands; --op >= 0;)
6805     /* Skip non-register operands. */
6806     if (i.types[op].bitfield.class != Reg)
6807       continue;
6808     /* Reject eight bit registers, except where the template requires
6809        them. (eg. movzb)  */
6810     else if (i.types[op].bitfield.byte
6811              && (i.tm.operand_types[op].bitfield.class == Reg
6812                  || i.tm.operand_types[op].bitfield.instance == Accum)
6813              && (i.tm.operand_types[op].bitfield.word
6814                  || i.tm.operand_types[op].bitfield.dword))
6815       {
6816         as_bad (_("`%s%s' not allowed with `%s%c'"),
6817                 register_prefix,
6818                 i.op[op].regs->reg_name,
6819                 i.tm.name,
6820                 i.suffix);
6821         return 0;
6822       }
6823     /* Error if the e prefix on a general reg is missing.  */
6824     else if (i.types[op].bitfield.word
6825              && (i.tm.operand_types[op].bitfield.class == Reg
6826                  || i.tm.operand_types[op].bitfield.instance == Accum)
6827              && i.tm.operand_types[op].bitfield.dword)
6828       {
6829         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6830                 register_prefix, i.op[op].regs->reg_name,
6831                 i.suffix);
6832         return 0;
6833       }
6834     /* Warn if the r prefix on a general reg is present.  */
6835     else if (i.types[op].bitfield.qword
6836              && (i.tm.operand_types[op].bitfield.class == Reg
6837                  || i.tm.operand_types[op].bitfield.instance == Accum)
6838              && i.tm.operand_types[op].bitfield.dword)
6839       {
6840         if (intel_syntax
6841             && i.tm.opcode_modifier.toqword
6842             && i.types[0].bitfield.class != RegSIMD)
6843           {
6844             /* Convert to QWORD.  We want REX byte. */
6845             i.suffix = QWORD_MNEM_SUFFIX;
6846           }
6847         else
6848           {
6849             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6850                     register_prefix, i.op[op].regs->reg_name,
6851                     i.suffix);
6852             return 0;
6853           }
6854       }
6855   return 1;
6856 }
6857
6858 static int
6859 check_qword_reg (void)
6860 {
6861   int op;
6862
6863   for (op = i.operands; --op >= 0; )
6864     /* Skip non-register operands. */
6865     if (i.types[op].bitfield.class != Reg)
6866       continue;
6867     /* Reject eight bit registers, except where the template requires
6868        them. (eg. movzb)  */
6869     else if (i.types[op].bitfield.byte
6870              && (i.tm.operand_types[op].bitfield.class == Reg
6871                  || i.tm.operand_types[op].bitfield.instance == Accum)
6872              && (i.tm.operand_types[op].bitfield.word
6873                  || i.tm.operand_types[op].bitfield.dword))
6874       {
6875         as_bad (_("`%s%s' not allowed with `%s%c'"),
6876                 register_prefix,
6877                 i.op[op].regs->reg_name,
6878                 i.tm.name,
6879                 i.suffix);
6880         return 0;
6881       }
6882     /* Warn if the r prefix on a general reg is missing.  */
6883     else if ((i.types[op].bitfield.word
6884               || i.types[op].bitfield.dword)
6885              && (i.tm.operand_types[op].bitfield.class == Reg
6886                  || i.tm.operand_types[op].bitfield.instance == Accum)
6887              && i.tm.operand_types[op].bitfield.qword)
6888       {
6889         /* Prohibit these changes in the 64bit mode, since the
6890            lowering is more complicated.  */
6891         if (intel_syntax
6892             && i.tm.opcode_modifier.todword
6893             && i.types[0].bitfield.class != RegSIMD)
6894           {
6895             /* Convert to DWORD.  We don't want REX byte. */
6896             i.suffix = LONG_MNEM_SUFFIX;
6897           }
6898         else
6899           {
6900             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6901                     register_prefix, i.op[op].regs->reg_name,
6902                     i.suffix);
6903             return 0;
6904           }
6905       }
6906   return 1;
6907 }
6908
6909 static int
6910 check_word_reg (void)
6911 {
6912   int op;
6913   for (op = i.operands; --op >= 0;)
6914     /* Skip non-register operands. */
6915     if (i.types[op].bitfield.class != Reg)
6916       continue;
6917     /* Reject eight bit registers, except where the template requires
6918        them. (eg. movzb)  */
6919     else if (i.types[op].bitfield.byte
6920              && (i.tm.operand_types[op].bitfield.class == Reg
6921                  || i.tm.operand_types[op].bitfield.instance == Accum)
6922              && (i.tm.operand_types[op].bitfield.word
6923                  || i.tm.operand_types[op].bitfield.dword))
6924       {
6925         as_bad (_("`%s%s' not allowed with `%s%c'"),
6926                 register_prefix,
6927                 i.op[op].regs->reg_name,
6928                 i.tm.name,
6929                 i.suffix);
6930         return 0;
6931       }
6932     /* Error if the e or r prefix on a general reg is present.  */
6933     else if ((i.types[op].bitfield.dword
6934                  || i.types[op].bitfield.qword)
6935              && (i.tm.operand_types[op].bitfield.class == Reg
6936                  || i.tm.operand_types[op].bitfield.instance == Accum)
6937              && i.tm.operand_types[op].bitfield.word)
6938       {
6939         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6940                 register_prefix, i.op[op].regs->reg_name,
6941                 i.suffix);
6942         return 0;
6943       }
6944   return 1;
6945 }
6946
6947 static int
6948 update_imm (unsigned int j)
6949 {
6950   i386_operand_type overlap = i.types[j];
6951   if ((overlap.bitfield.imm8
6952        || overlap.bitfield.imm8s
6953        || overlap.bitfield.imm16
6954        || overlap.bitfield.imm32
6955        || overlap.bitfield.imm32s
6956        || overlap.bitfield.imm64)
6957       && !operand_type_equal (&overlap, &imm8)
6958       && !operand_type_equal (&overlap, &imm8s)
6959       && !operand_type_equal (&overlap, &imm16)
6960       && !operand_type_equal (&overlap, &imm32)
6961       && !operand_type_equal (&overlap, &imm32s)
6962       && !operand_type_equal (&overlap, &imm64))
6963     {
6964       if (i.suffix)
6965         {
6966           i386_operand_type temp;
6967
6968           operand_type_set (&temp, 0);
6969           if (i.suffix == BYTE_MNEM_SUFFIX)
6970             {
6971               temp.bitfield.imm8 = overlap.bitfield.imm8;
6972               temp.bitfield.imm8s = overlap.bitfield.imm8s;
6973             }
6974           else if (i.suffix == WORD_MNEM_SUFFIX)
6975             temp.bitfield.imm16 = overlap.bitfield.imm16;
6976           else if (i.suffix == QWORD_MNEM_SUFFIX)
6977             {
6978               temp.bitfield.imm64 = overlap.bitfield.imm64;
6979               temp.bitfield.imm32s = overlap.bitfield.imm32s;
6980             }
6981           else
6982             temp.bitfield.imm32 = overlap.bitfield.imm32;
6983           overlap = temp;
6984         }
6985       else if (operand_type_equal (&overlap, &imm16_32_32s)
6986                || operand_type_equal (&overlap, &imm16_32)
6987                || operand_type_equal (&overlap, &imm16_32s))
6988         {
6989           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6990             overlap = imm16;
6991           else
6992             overlap = imm32s;
6993         }
6994       if (!operand_type_equal (&overlap, &imm8)
6995           && !operand_type_equal (&overlap, &imm8s)
6996           && !operand_type_equal (&overlap, &imm16)
6997           && !operand_type_equal (&overlap, &imm32)
6998           && !operand_type_equal (&overlap, &imm32s)
6999           && !operand_type_equal (&overlap, &imm64))
7000         {
7001           as_bad (_("no instruction mnemonic suffix given; "
7002                     "can't determine immediate size"));
7003           return 0;
7004         }
7005     }
7006   i.types[j] = overlap;
7007
7008   return 1;
7009 }
7010
7011 static int
7012 finalize_imm (void)
7013 {
7014   unsigned int j, n;
7015
7016   /* Update the first 2 immediate operands.  */
7017   n = i.operands > 2 ? 2 : i.operands;
7018   if (n)
7019     {
7020       for (j = 0; j < n; j++)
7021         if (update_imm (j) == 0)
7022           return 0;
7023
7024       /* The 3rd operand can't be immediate operand.  */
7025       gas_assert (operand_type_check (i.types[2], imm) == 0);
7026     }
7027
7028   return 1;
7029 }
7030
7031 static int
7032 process_operands (void)
7033 {
7034   /* Default segment register this instruction will use for memory
7035      accesses.  0 means unknown.  This is only for optimizing out
7036      unnecessary segment overrides.  */
7037   const seg_entry *default_seg = 0;
7038
7039   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7040     {
7041       unsigned int dupl = i.operands;
7042       unsigned int dest = dupl - 1;
7043       unsigned int j;
7044
7045       /* The destination must be an xmm register.  */
7046       gas_assert (i.reg_operands
7047                   && MAX_OPERANDS > dupl
7048                   && operand_type_equal (&i.types[dest], &regxmm));
7049
7050       if (i.tm.operand_types[0].bitfield.instance == Accum
7051           && i.tm.operand_types[0].bitfield.xmmword)
7052         {
7053           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7054             {
7055               /* Keep xmm0 for instructions with VEX prefix and 3
7056                  sources.  */
7057               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7058               i.tm.operand_types[0].bitfield.class = RegSIMD;
7059               goto duplicate;
7060             }
7061           else
7062             {
7063               /* We remove the first xmm0 and keep the number of
7064                  operands unchanged, which in fact duplicates the
7065                  destination.  */
7066               for (j = 1; j < i.operands; j++)
7067                 {
7068                   i.op[j - 1] = i.op[j];
7069                   i.types[j - 1] = i.types[j];
7070                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7071                   i.flags[j - 1] = i.flags[j];
7072                 }
7073             }
7074         }
7075       else if (i.tm.opcode_modifier.implicit1stxmm0)
7076         {
7077           gas_assert ((MAX_OPERANDS - 1) > dupl
7078                       && (i.tm.opcode_modifier.vexsources
7079                           == VEX3SOURCES));
7080
7081           /* Add the implicit xmm0 for instructions with VEX prefix
7082              and 3 sources.  */
7083           for (j = i.operands; j > 0; j--)
7084             {
7085               i.op[j] = i.op[j - 1];
7086               i.types[j] = i.types[j - 1];
7087               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7088               i.flags[j] = i.flags[j - 1];
7089             }
7090           i.op[0].regs
7091             = (const reg_entry *) hash_find (reg_hash, "xmm0");
7092           i.types[0] = regxmm;
7093           i.tm.operand_types[0] = regxmm;
7094
7095           i.operands += 2;
7096           i.reg_operands += 2;
7097           i.tm.operands += 2;
7098
7099           dupl++;
7100           dest++;
7101           i.op[dupl] = i.op[dest];
7102           i.types[dupl] = i.types[dest];
7103           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7104           i.flags[dupl] = i.flags[dest];
7105         }
7106       else
7107         {
7108         duplicate:
7109           i.operands++;
7110           i.reg_operands++;
7111           i.tm.operands++;
7112
7113           i.op[dupl] = i.op[dest];
7114           i.types[dupl] = i.types[dest];
7115           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7116           i.flags[dupl] = i.flags[dest];
7117         }
7118
7119        if (i.tm.opcode_modifier.immext)
7120          process_immext ();
7121     }
7122   else if (i.tm.operand_types[0].bitfield.instance == Accum
7123            && i.tm.operand_types[0].bitfield.xmmword)
7124     {
7125       unsigned int j;
7126
7127       for (j = 1; j < i.operands; j++)
7128         {
7129           i.op[j - 1] = i.op[j];
7130           i.types[j - 1] = i.types[j];
7131
7132           /* We need to adjust fields in i.tm since they are used by
7133              build_modrm_byte.  */
7134           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7135
7136           i.flags[j - 1] = i.flags[j];
7137         }
7138
7139       i.operands--;
7140       i.reg_operands--;
7141       i.tm.operands--;
7142     }
7143   else if (i.tm.opcode_modifier.implicitquadgroup)
7144     {
7145       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7146
7147       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7148       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7149       regnum = register_number (i.op[1].regs);
7150       first_reg_in_group = regnum & ~3;
7151       last_reg_in_group = first_reg_in_group + 3;
7152       if (regnum != first_reg_in_group)
7153         as_warn (_("source register `%s%s' implicitly denotes"
7154                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7155                  register_prefix, i.op[1].regs->reg_name,
7156                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7157                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7158                  i.tm.name);
7159     }
7160   else if (i.tm.opcode_modifier.regkludge)
7161     {
7162       /* The imul $imm, %reg instruction is converted into
7163          imul $imm, %reg, %reg, and the clr %reg instruction
7164          is converted into xor %reg, %reg.  */
7165
7166       unsigned int first_reg_op;
7167
7168       if (operand_type_check (i.types[0], reg))
7169         first_reg_op = 0;
7170       else
7171         first_reg_op = 1;
7172       /* Pretend we saw the extra register operand.  */
7173       gas_assert (i.reg_operands == 1
7174                   && i.op[first_reg_op + 1].regs == 0);
7175       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7176       i.types[first_reg_op + 1] = i.types[first_reg_op];
7177       i.operands++;
7178       i.reg_operands++;
7179     }
7180
7181   if (i.tm.opcode_modifier.modrm)
7182     {
7183       /* The opcode is completed (modulo i.tm.extension_opcode which
7184          must be put into the modrm byte).  Now, we make the modrm and
7185          index base bytes based on all the info we've collected.  */
7186
7187       default_seg = build_modrm_byte ();
7188     }
7189   else if (i.types[0].bitfield.class == SReg)
7190     {
7191       if (flag_code != CODE_64BIT
7192           ? i.tm.base_opcode == POP_SEG_SHORT
7193             && i.op[0].regs->reg_num == 1
7194           : (i.tm.base_opcode | 1) == POP_SEG386_SHORT
7195             && i.op[0].regs->reg_num < 4)
7196         {
7197           as_bad (_("you can't `%s %s%s'"),
7198                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7199           return 0;
7200         }
7201       if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 )
7202         {
7203           i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT;
7204           i.tm.opcode_length = 2;
7205         }
7206       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7207     }
7208   else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32)
7209     {
7210       default_seg = &ds;
7211     }
7212   else if (i.tm.opcode_modifier.isstring)
7213     {
7214       /* For the string instructions that allow a segment override
7215          on one of their operands, the default segment is ds.  */
7216       default_seg = &ds;
7217     }
7218   else if (i.short_form)
7219     {
7220       /* The register or float register operand is in operand
7221          0 or 1.  */
7222       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7223
7224       /* Register goes in low 3 bits of opcode.  */
7225       i.tm.base_opcode |= i.op[op].regs->reg_num;
7226       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7227         i.rex |= REX_B;
7228       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7229         {
7230           /* Warn about some common errors, but press on regardless.
7231              The first case can be generated by gcc (<= 2.8.1).  */
7232           if (i.operands == 2)
7233             {
7234               /* Reversed arguments on faddp, fsubp, etc.  */
7235               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7236                        register_prefix, i.op[!intel_syntax].regs->reg_name,
7237                        register_prefix, i.op[intel_syntax].regs->reg_name);
7238             }
7239           else
7240             {
7241               /* Extraneous `l' suffix on fp insn.  */
7242               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7243                        register_prefix, i.op[0].regs->reg_name);
7244             }
7245         }
7246     }
7247
7248   if ((i.seg[0] || i.prefix[SEG_PREFIX])
7249       && i.tm.base_opcode == 0x8d /* lea */
7250       && !is_any_vex_encoding(&i.tm))
7251     {
7252       if (!quiet_warnings)
7253         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7254       if (optimize)
7255         {
7256           i.seg[0] = NULL;
7257           i.prefix[SEG_PREFIX] = 0;
7258         }
7259     }
7260
7261   /* If a segment was explicitly specified, and the specified segment
7262      is neither the default nor the one already recorded from a prefix,
7263      use an opcode prefix to select it.  If we never figured out what
7264      the default segment is, then default_seg will be zero at this
7265      point, and the specified segment prefix will always be used.  */
7266   if (i.seg[0]
7267       && i.seg[0] != default_seg
7268       && i.seg[0]->seg_prefix != i.prefix[SEG_PREFIX])
7269     {
7270       if (!add_prefix (i.seg[0]->seg_prefix))
7271         return 0;
7272     }
7273   return 1;
7274 }
7275
7276 static const seg_entry *
7277 build_modrm_byte (void)
7278 {
7279   const seg_entry *default_seg = 0;
7280   unsigned int source, dest;
7281   int vex_3_sources;
7282
7283   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
7284   if (vex_3_sources)
7285     {
7286       unsigned int nds, reg_slot;
7287       expressionS *exp;
7288
7289       dest = i.operands - 1;
7290       nds = dest - 1;
7291
7292       /* There are 2 kinds of instructions:
7293          1. 5 operands: 4 register operands or 3 register operands
7294          plus 1 memory operand plus one Imm4 operand, VexXDS, and
7295          VexW0 or VexW1.  The destination must be either XMM, YMM or
7296          ZMM register.
7297          2. 4 operands: 4 register operands or 3 register operands
7298          plus 1 memory operand, with VexXDS.  */
7299       gas_assert ((i.reg_operands == 4
7300                    || (i.reg_operands == 3 && i.mem_operands == 1))
7301                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
7302                   && i.tm.opcode_modifier.vexw
7303                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
7304
7305       /* If VexW1 is set, the first non-immediate operand is the source and
7306          the second non-immediate one is encoded in the immediate operand.  */
7307       if (i.tm.opcode_modifier.vexw == VEXW1)
7308         {
7309           source = i.imm_operands;
7310           reg_slot = i.imm_operands + 1;
7311         }
7312       else
7313         {
7314           source = i.imm_operands + 1;
7315           reg_slot = i.imm_operands;
7316         }
7317
7318       if (i.imm_operands == 0)
7319         {
7320           /* When there is no immediate operand, generate an 8bit
7321              immediate operand to encode the first operand.  */
7322           exp = &im_expressions[i.imm_operands++];
7323           i.op[i.operands].imms = exp;
7324           i.types[i.operands] = imm8;
7325           i.operands++;
7326
7327           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7328           exp->X_op = O_constant;
7329           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
7330           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7331         }
7332       else
7333         {
7334           gas_assert (i.imm_operands == 1);
7335           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
7336           gas_assert (!i.tm.opcode_modifier.immext);
7337
7338           /* Turn on Imm8 again so that output_imm will generate it.  */
7339           i.types[0].bitfield.imm8 = 1;
7340
7341           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7342           i.op[0].imms->X_add_number
7343               |= register_number (i.op[reg_slot].regs) << 4;
7344           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7345         }
7346
7347       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
7348       i.vex.register_specifier = i.op[nds].regs;
7349     }
7350   else
7351     source = dest = 0;
7352
7353   /* i.reg_operands MUST be the number of real register operands;
7354      implicit registers do not count.  If there are 3 register
7355      operands, it must be a instruction with VexNDS.  For a
7356      instruction with VexNDD, the destination register is encoded
7357      in VEX prefix.  If there are 4 register operands, it must be
7358      a instruction with VEX prefix and 3 sources.  */
7359   if (i.mem_operands == 0
7360       && ((i.reg_operands == 2
7361            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
7362           || (i.reg_operands == 3
7363               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
7364           || (i.reg_operands == 4 && vex_3_sources)))
7365     {
7366       switch (i.operands)
7367         {
7368         case 2:
7369           source = 0;
7370           break;
7371         case 3:
7372           /* When there are 3 operands, one of them may be immediate,
7373              which may be the first or the last operand.  Otherwise,
7374              the first operand must be shift count register (cl) or it
7375              is an instruction with VexNDS. */
7376           gas_assert (i.imm_operands == 1
7377                       || (i.imm_operands == 0
7378                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
7379                               || (i.types[0].bitfield.instance == RegC
7380                                   && i.types[0].bitfield.byte))));
7381           if (operand_type_check (i.types[0], imm)
7382               || (i.types[0].bitfield.instance == RegC
7383                   && i.types[0].bitfield.byte))
7384             source = 1;
7385           else
7386             source = 0;
7387           break;
7388         case 4:
7389           /* When there are 4 operands, the first two must be 8bit
7390              immediate operands. The source operand will be the 3rd
7391              one.
7392
7393              For instructions with VexNDS, if the first operand
7394              an imm8, the source operand is the 2nd one.  If the last
7395              operand is imm8, the source operand is the first one.  */
7396           gas_assert ((i.imm_operands == 2
7397                        && i.types[0].bitfield.imm8
7398                        && i.types[1].bitfield.imm8)
7399                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
7400                           && i.imm_operands == 1
7401                           && (i.types[0].bitfield.imm8
7402                               || i.types[i.operands - 1].bitfield.imm8
7403                               || i.rounding)));
7404           if (i.imm_operands == 2)
7405             source = 2;
7406           else
7407             {
7408               if (i.types[0].bitfield.imm8)
7409                 source = 1;
7410               else
7411                 source = 0;
7412             }
7413           break;
7414         case 5:
7415           if (is_evex_encoding (&i.tm))
7416             {
7417               /* For EVEX instructions, when there are 5 operands, the
7418                  first one must be immediate operand.  If the second one
7419                  is immediate operand, the source operand is the 3th
7420                  one.  If the last one is immediate operand, the source
7421                  operand is the 2nd one.  */
7422               gas_assert (i.imm_operands == 2
7423                           && i.tm.opcode_modifier.sae
7424                           && operand_type_check (i.types[0], imm));
7425               if (operand_type_check (i.types[1], imm))
7426                 source = 2;
7427               else if (operand_type_check (i.types[4], imm))
7428                 source = 1;
7429               else
7430                 abort ();
7431             }
7432           break;
7433         default:
7434           abort ();
7435         }
7436
7437       if (!vex_3_sources)
7438         {
7439           dest = source + 1;
7440
7441           /* RC/SAE operand could be between DEST and SRC.  That happens
7442              when one operand is GPR and the other one is XMM/YMM/ZMM
7443              register.  */
7444           if (i.rounding && i.rounding->operand == (int) dest)
7445             dest++;
7446
7447           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7448             {
7449               /* For instructions with VexNDS, the register-only source
7450                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
7451                  register.  It is encoded in VEX prefix.  */
7452
7453               i386_operand_type op;
7454               unsigned int vvvv;
7455
7456               /* Check register-only source operand when two source
7457                  operands are swapped.  */
7458               if (!i.tm.operand_types[source].bitfield.baseindex
7459                   && i.tm.operand_types[dest].bitfield.baseindex)
7460                 {
7461                   vvvv = source;
7462                   source = dest;
7463                 }
7464               else
7465                 vvvv = dest;
7466
7467               op = i.tm.operand_types[vvvv];
7468               if ((dest + 1) >= i.operands
7469                   || ((op.bitfield.class != Reg
7470                        || (!op.bitfield.dword && !op.bitfield.qword))
7471                       && op.bitfield.class != RegSIMD
7472                       && !operand_type_equal (&op, &regmask)))
7473                 abort ();
7474               i.vex.register_specifier = i.op[vvvv].regs;
7475               dest++;
7476             }
7477         }
7478
7479       i.rm.mode = 3;
7480       /* One of the register operands will be encoded in the i.rm.reg
7481          field, the other in the combined i.rm.mode and i.rm.regmem
7482          fields.  If no form of this instruction supports a memory
7483          destination operand, then we assume the source operand may
7484          sometimes be a memory operand and so we need to store the
7485          destination in the i.rm.reg field.  */
7486       if (!i.tm.opcode_modifier.regmem
7487           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
7488         {
7489           i.rm.reg = i.op[dest].regs->reg_num;
7490           i.rm.regmem = i.op[source].regs->reg_num;
7491           if (i.op[dest].regs->reg_type.bitfield.class == RegMMX
7492                || i.op[source].regs->reg_type.bitfield.class == RegMMX)
7493             i.has_regmmx = TRUE;
7494           else if (i.op[dest].regs->reg_type.bitfield.class == RegSIMD
7495                    || i.op[source].regs->reg_type.bitfield.class == RegSIMD)
7496             {
7497               if (i.types[dest].bitfield.zmmword
7498                   || i.types[source].bitfield.zmmword)
7499                 i.has_regzmm = TRUE;
7500               else if (i.types[dest].bitfield.ymmword
7501                        || i.types[source].bitfield.ymmword)
7502                 i.has_regymm = TRUE;
7503               else
7504                 i.has_regxmm = TRUE;
7505             }
7506           if ((i.op[dest].regs->reg_flags & RegRex) != 0)
7507             i.rex |= REX_R;
7508           if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
7509             i.vrex |= REX_R;
7510           if ((i.op[source].regs->reg_flags & RegRex) != 0)
7511             i.rex |= REX_B;
7512           if ((i.op[source].regs->reg_flags & RegVRex) != 0)
7513             i.vrex |= REX_B;
7514         }
7515       else
7516         {
7517           i.rm.reg = i.op[source].regs->reg_num;
7518           i.rm.regmem = i.op[dest].regs->reg_num;
7519           if ((i.op[dest].regs->reg_flags & RegRex) != 0)
7520             i.rex |= REX_B;
7521           if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
7522             i.vrex |= REX_B;
7523           if ((i.op[source].regs->reg_flags & RegRex) != 0)
7524             i.rex |= REX_R;
7525           if ((i.op[source].regs->reg_flags & RegVRex) != 0)
7526             i.vrex |= REX_R;
7527         }
7528       if (flag_code != CODE_64BIT && (i.rex & REX_R))
7529         {
7530           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
7531             abort ();
7532           i.rex &= ~REX_R;
7533           add_prefix (LOCK_PREFIX_OPCODE);
7534         }
7535     }
7536   else
7537     {                   /* If it's not 2 reg operands...  */
7538       unsigned int mem;
7539
7540       if (i.mem_operands)
7541         {
7542           unsigned int fake_zero_displacement = 0;
7543           unsigned int op;
7544
7545           for (op = 0; op < i.operands; op++)
7546             if (i.flags[op] & Operand_Mem)
7547               break;
7548           gas_assert (op < i.operands);
7549
7550           if (i.tm.opcode_modifier.vecsib)
7551             {
7552               if (i.index_reg->reg_num == RegIZ)
7553                 abort ();
7554
7555               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7556               if (!i.base_reg)
7557                 {
7558                   i.sib.base = NO_BASE_REGISTER;
7559                   i.sib.scale = i.log2_scale_factor;
7560                   i.types[op].bitfield.disp8 = 0;
7561                   i.types[op].bitfield.disp16 = 0;
7562                   i.types[op].bitfield.disp64 = 0;
7563                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
7564                     {
7565                       /* Must be 32 bit */
7566                       i.types[op].bitfield.disp32 = 1;
7567                       i.types[op].bitfield.disp32s = 0;
7568                     }
7569                   else
7570                     {
7571                       i.types[op].bitfield.disp32 = 0;
7572                       i.types[op].bitfield.disp32s = 1;
7573                     }
7574                 }
7575               i.sib.index = i.index_reg->reg_num;
7576               if ((i.index_reg->reg_flags & RegRex) != 0)
7577                 i.rex |= REX_X;
7578               if ((i.index_reg->reg_flags & RegVRex) != 0)
7579                 i.vrex |= REX_X;
7580             }
7581
7582           default_seg = &ds;
7583
7584           if (i.base_reg == 0)
7585             {
7586               i.rm.mode = 0;
7587               if (!i.disp_operands)
7588                 fake_zero_displacement = 1;
7589               if (i.index_reg == 0)
7590                 {
7591                   i386_operand_type newdisp;
7592
7593                   gas_assert (!i.tm.opcode_modifier.vecsib);
7594                   /* Operand is just <disp>  */
7595                   if (flag_code == CODE_64BIT)
7596                     {
7597                       /* 64bit mode overwrites the 32bit absolute
7598                          addressing by RIP relative addressing and
7599                          absolute addressing is encoded by one of the
7600                          redundant SIB forms.  */
7601                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7602                       i.sib.base = NO_BASE_REGISTER;
7603                       i.sib.index = NO_INDEX_REGISTER;
7604                       newdisp = (!i.prefix[ADDR_PREFIX] ? disp32s : disp32);
7605                     }
7606                   else if ((flag_code == CODE_16BIT)
7607                            ^ (i.prefix[ADDR_PREFIX] != 0))
7608                     {
7609                       i.rm.regmem = NO_BASE_REGISTER_16;
7610                       newdisp = disp16;
7611                     }
7612                   else
7613                     {
7614                       i.rm.regmem = NO_BASE_REGISTER;
7615                       newdisp = disp32;
7616                     }
7617                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
7618                   i.types[op] = operand_type_or (i.types[op], newdisp);
7619                 }
7620               else if (!i.tm.opcode_modifier.vecsib)
7621                 {
7622                   /* !i.base_reg && i.index_reg  */
7623                   if (i.index_reg->reg_num == RegIZ)
7624                     i.sib.index = NO_INDEX_REGISTER;
7625                   else
7626                     i.sib.index = i.index_reg->reg_num;
7627                   i.sib.base = NO_BASE_REGISTER;
7628                   i.sib.scale = i.log2_scale_factor;
7629                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7630                   i.types[op].bitfield.disp8 = 0;
7631                   i.types[op].bitfield.disp16 = 0;
7632                   i.types[op].bitfield.disp64 = 0;
7633                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
7634                     {
7635                       /* Must be 32 bit */
7636                       i.types[op].bitfield.disp32 = 1;
7637                       i.types[op].bitfield.disp32s = 0;
7638                     }
7639                   else
7640                     {
7641                       i.types[op].bitfield.disp32 = 0;
7642                       i.types[op].bitfield.disp32s = 1;
7643                     }
7644                   if ((i.index_reg->reg_flags & RegRex) != 0)
7645                     i.rex |= REX_X;
7646                 }
7647             }
7648           /* RIP addressing for 64bit mode.  */
7649           else if (i.base_reg->reg_num == RegIP)
7650             {
7651               gas_assert (!i.tm.opcode_modifier.vecsib);
7652               i.rm.regmem = NO_BASE_REGISTER;
7653               i.types[op].bitfield.disp8 = 0;
7654               i.types[op].bitfield.disp16 = 0;
7655               i.types[op].bitfield.disp32 = 0;
7656               i.types[op].bitfield.disp32s = 1;
7657               i.types[op].bitfield.disp64 = 0;
7658               i.flags[op] |= Operand_PCrel;
7659               if (! i.disp_operands)
7660                 fake_zero_displacement = 1;
7661             }
7662           else if (i.base_reg->reg_type.bitfield.word)
7663             {
7664               gas_assert (!i.tm.opcode_modifier.vecsib);
7665               switch (i.base_reg->reg_num)
7666                 {
7667                 case 3: /* (%bx)  */
7668                   if (i.index_reg == 0)
7669                     i.rm.regmem = 7;
7670                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
7671                     i.rm.regmem = i.index_reg->reg_num - 6;
7672                   break;
7673                 case 5: /* (%bp)  */
7674                   default_seg = &ss;
7675                   if (i.index_reg == 0)
7676                     {
7677                       i.rm.regmem = 6;
7678                       if (operand_type_check (i.types[op], disp) == 0)
7679                         {
7680                           /* fake (%bp) into 0(%bp)  */
7681                           i.types[op].bitfield.disp8 = 1;
7682                           fake_zero_displacement = 1;
7683                         }
7684                     }
7685                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
7686                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
7687                   break;
7688                 default: /* (%si) -> 4 or (%di) -> 5  */
7689                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
7690                 }
7691               i.rm.mode = mode_from_disp_size (i.types[op]);
7692             }
7693           else /* i.base_reg and 32/64 bit mode  */
7694             {
7695               if (flag_code == CODE_64BIT
7696                   && operand_type_check (i.types[op], disp))
7697                 {
7698                   i.types[op].bitfield.disp16 = 0;
7699                   i.types[op].bitfield.disp64 = 0;
7700                   if (i.prefix[ADDR_PREFIX] == 0)
7701                     {
7702                       i.types[op].bitfield.disp32 = 0;
7703                       i.types[op].bitfield.disp32s = 1;
7704                     }
7705                   else
7706                     {
7707                       i.types[op].bitfield.disp32 = 1;
7708                       i.types[op].bitfield.disp32s = 0;
7709                     }
7710                 }
7711
7712               if (!i.tm.opcode_modifier.vecsib)
7713                 i.rm.regmem = i.base_reg->reg_num;
7714               if ((i.base_reg->reg_flags & RegRex) != 0)
7715                 i.rex |= REX_B;
7716               i.sib.base = i.base_reg->reg_num;
7717               /* x86-64 ignores REX prefix bit here to avoid decoder
7718                  complications.  */
7719               if (!(i.base_reg->reg_flags & RegRex)
7720                   && (i.base_reg->reg_num == EBP_REG_NUM
7721                    || i.base_reg->reg_num == ESP_REG_NUM))
7722                   default_seg = &ss;
7723               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
7724                 {
7725                   fake_zero_displacement = 1;
7726                   i.types[op].bitfield.disp8 = 1;
7727                 }
7728               i.sib.scale = i.log2_scale_factor;
7729               if (i.index_reg == 0)
7730                 {
7731                   gas_assert (!i.tm.opcode_modifier.vecsib);
7732                   /* <disp>(%esp) becomes two byte modrm with no index
7733                      register.  We've already stored the code for esp
7734                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
7735                      Any base register besides %esp will not use the
7736                      extra modrm byte.  */
7737                   i.sib.index = NO_INDEX_REGISTER;
7738                 }
7739               else if (!i.tm.opcode_modifier.vecsib)
7740                 {
7741                   if (i.index_reg->reg_num == RegIZ)
7742                     i.sib.index = NO_INDEX_REGISTER;
7743                   else
7744                     i.sib.index = i.index_reg->reg_num;
7745                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7746                   if ((i.index_reg->reg_flags & RegRex) != 0)
7747                     i.rex |= REX_X;
7748                 }
7749
7750               if (i.disp_operands
7751                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
7752                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
7753                 i.rm.mode = 0;
7754               else
7755                 {
7756                   if (!fake_zero_displacement
7757                       && !i.disp_operands
7758                       && i.disp_encoding)
7759                     {
7760                       fake_zero_displacement = 1;
7761                       if (i.disp_encoding == disp_encoding_8bit)
7762                         i.types[op].bitfield.disp8 = 1;
7763                       else
7764                         i.types[op].bitfield.disp32 = 1;
7765                     }
7766                   i.rm.mode = mode_from_disp_size (i.types[op]);
7767                 }
7768             }
7769
7770           if (fake_zero_displacement)
7771             {
7772               /* Fakes a zero displacement assuming that i.types[op]
7773                  holds the correct displacement size.  */
7774               expressionS *exp;
7775
7776               gas_assert (i.op[op].disps == 0);
7777               exp = &disp_expressions[i.disp_operands++];
7778               i.op[op].disps = exp;
7779               exp->X_op = O_constant;
7780               exp->X_add_number = 0;
7781               exp->X_add_symbol = (symbolS *) 0;
7782               exp->X_op_symbol = (symbolS *) 0;
7783             }
7784
7785           mem = op;
7786         }
7787       else
7788         mem = ~0;
7789
7790       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
7791         {
7792           if (operand_type_check (i.types[0], imm))
7793             i.vex.register_specifier = NULL;
7794           else
7795             {
7796               /* VEX.vvvv encodes one of the sources when the first
7797                  operand is not an immediate.  */
7798               if (i.tm.opcode_modifier.vexw == VEXW0)
7799                 i.vex.register_specifier = i.op[0].regs;
7800               else
7801                 i.vex.register_specifier = i.op[1].regs;
7802             }
7803
7804           /* Destination is a XMM register encoded in the ModRM.reg
7805              and VEX.R bit.  */
7806           i.rm.reg = i.op[2].regs->reg_num;
7807           if ((i.op[2].regs->reg_flags & RegRex) != 0)
7808             i.rex |= REX_R;
7809
7810           /* ModRM.rm and VEX.B encodes the other source.  */
7811           if (!i.mem_operands)
7812             {
7813               i.rm.mode = 3;
7814
7815               if (i.tm.opcode_modifier.vexw == VEXW0)
7816                 i.rm.regmem = i.op[1].regs->reg_num;
7817               else
7818                 i.rm.regmem = i.op[0].regs->reg_num;
7819
7820               if ((i.op[1].regs->reg_flags & RegRex) != 0)
7821                 i.rex |= REX_B;
7822             }
7823         }
7824       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
7825         {
7826           i.vex.register_specifier = i.op[2].regs;
7827           if (!i.mem_operands)
7828             {
7829               i.rm.mode = 3;
7830               i.rm.regmem = i.op[1].regs->reg_num;
7831               if ((i.op[1].regs->reg_flags & RegRex) != 0)
7832                 i.rex |= REX_B;
7833             }
7834         }
7835       /* Fill in i.rm.reg or i.rm.regmem field with register operand
7836          (if any) based on i.tm.extension_opcode.  Again, we must be
7837          careful to make sure that segment/control/debug/test/MMX
7838          registers are coded into the i.rm.reg field.  */
7839       else if (i.reg_operands)
7840         {
7841           unsigned int op;
7842           unsigned int vex_reg = ~0;
7843
7844           for (op = 0; op < i.operands; op++)
7845             {
7846               if (i.types[op].bitfield.class == Reg
7847                   || i.types[op].bitfield.class == RegBND
7848                   || i.types[op].bitfield.class == RegMask
7849                   || i.types[op].bitfield.class == SReg
7850                   || i.types[op].bitfield.class == RegCR
7851                   || i.types[op].bitfield.class == RegDR
7852                   || i.types[op].bitfield.class == RegTR)
7853                 break;
7854               if (i.types[op].bitfield.class == RegSIMD)
7855                 {
7856                   if (i.types[op].bitfield.zmmword)
7857                     i.has_regzmm = TRUE;
7858                   else if (i.types[op].bitfield.ymmword)
7859                     i.has_regymm = TRUE;
7860                   else
7861                     i.has_regxmm = TRUE;
7862                   break;
7863                 }
7864               if (i.types[op].bitfield.class == RegMMX)
7865                 {
7866                   i.has_regmmx = TRUE;
7867                   break;
7868                 }
7869             }
7870
7871           if (vex_3_sources)
7872             op = dest;
7873           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7874             {
7875               /* For instructions with VexNDS, the register-only
7876                  source operand is encoded in VEX prefix. */
7877               gas_assert (mem != (unsigned int) ~0);
7878
7879               if (op > mem)
7880                 {
7881                   vex_reg = op++;
7882                   gas_assert (op < i.operands);
7883                 }
7884               else
7885                 {
7886                   /* Check register-only source operand when two source
7887                      operands are swapped.  */
7888                   if (!i.tm.operand_types[op].bitfield.baseindex
7889                       && i.tm.operand_types[op + 1].bitfield.baseindex)
7890                     {
7891                       vex_reg = op;
7892                       op += 2;
7893                       gas_assert (mem == (vex_reg + 1)
7894                                   && op < i.operands);
7895                     }
7896                   else
7897                     {
7898                       vex_reg = op + 1;
7899                       gas_assert (vex_reg < i.operands);
7900                     }
7901                 }
7902             }
7903           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
7904             {
7905               /* For instructions with VexNDD, the register destination
7906                  is encoded in VEX prefix.  */
7907               if (i.mem_operands == 0)
7908                 {
7909                   /* There is no memory operand.  */
7910                   gas_assert ((op + 2) == i.operands);
7911                   vex_reg = op + 1;
7912                 }
7913               else
7914                 {
7915                   /* There are only 2 non-immediate operands.  */
7916                   gas_assert (op < i.imm_operands + 2
7917                               && i.operands == i.imm_operands + 2);
7918                   vex_reg = i.imm_operands + 1;
7919                 }
7920             }
7921           else
7922             gas_assert (op < i.operands);
7923
7924           if (vex_reg != (unsigned int) ~0)
7925             {
7926               i386_operand_type *type = &i.tm.operand_types[vex_reg];
7927
7928               if ((type->bitfield.class != Reg
7929                    || (!type->bitfield.dword && !type->bitfield.qword))
7930                   && type->bitfield.class != RegSIMD
7931                   && !operand_type_equal (type, &regmask))
7932                 abort ();
7933
7934               i.vex.register_specifier = i.op[vex_reg].regs;
7935             }
7936
7937           /* Don't set OP operand twice.  */
7938           if (vex_reg != op)
7939             {
7940               /* If there is an extension opcode to put here, the
7941                  register number must be put into the regmem field.  */
7942               if (i.tm.extension_opcode != None)
7943                 {
7944                   i.rm.regmem = i.op[op].regs->reg_num;
7945                   if ((i.op[op].regs->reg_flags & RegRex) != 0)
7946                     i.rex |= REX_B;
7947                   if ((i.op[op].regs->reg_flags & RegVRex) != 0)
7948                     i.vrex |= REX_B;
7949                 }
7950               else
7951                 {
7952                   i.rm.reg = i.op[op].regs->reg_num;
7953                   if ((i.op[op].regs->reg_flags & RegRex) != 0)
7954                     i.rex |= REX_R;
7955                   if ((i.op[op].regs->reg_flags & RegVRex) != 0)
7956                     i.vrex |= REX_R;
7957                 }
7958             }
7959
7960           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
7961              must set it to 3 to indicate this is a register operand
7962              in the regmem field.  */
7963           if (!i.mem_operands)
7964             i.rm.mode = 3;
7965         }
7966
7967       /* Fill in i.rm.reg field with extension opcode (if any).  */
7968       if (i.tm.extension_opcode != None)
7969         i.rm.reg = i.tm.extension_opcode;
7970     }
7971   return default_seg;
7972 }
7973
7974 static unsigned int
7975 flip_code16 (unsigned int code16)
7976 {
7977   gas_assert (i.tm.operands == 1);
7978
7979   return !(i.prefix[REX_PREFIX] & REX_W)
7980          && (code16 ? i.tm.operand_types[0].bitfield.disp32
7981                       || i.tm.operand_types[0].bitfield.disp32s
7982                     : i.tm.operand_types[0].bitfield.disp16)
7983          ? CODE16 : 0;
7984 }
7985
7986 static void
7987 output_branch (void)
7988 {
7989   char *p;
7990   int size;
7991   int code16;
7992   int prefix;
7993   relax_substateT subtype;
7994   symbolS *sym;
7995   offsetT off;
7996
7997   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
7998   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
7999
8000   prefix = 0;
8001   if (i.prefix[DATA_PREFIX] != 0)
8002     {
8003       prefix = 1;
8004       i.prefixes -= 1;
8005       code16 ^= flip_code16(code16);
8006     }
8007   /* Pentium4 branch hints.  */
8008   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8009       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8010     {
8011       prefix++;
8012       i.prefixes--;
8013     }
8014   if (i.prefix[REX_PREFIX] != 0)
8015     {
8016       prefix++;
8017       i.prefixes--;
8018     }
8019
8020   /* BND prefixed jump.  */
8021   if (i.prefix[BND_PREFIX] != 0)
8022     {
8023       prefix++;
8024       i.prefixes--;
8025     }
8026
8027   if (i.prefixes != 0)
8028     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8029
8030   /* It's always a symbol;  End frag & setup for relax.
8031      Make sure there is enough room in this frag for the largest
8032      instruction we may generate in md_convert_frag.  This is 2
8033      bytes for the opcode and room for the prefix and largest
8034      displacement.  */
8035   frag_grow (prefix + 2 + 4);
8036   /* Prefix and 1 opcode byte go in fr_fix.  */
8037   p = frag_more (prefix + 1);
8038   if (i.prefix[DATA_PREFIX] != 0)
8039     *p++ = DATA_PREFIX_OPCODE;
8040   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8041       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8042     *p++ = i.prefix[SEG_PREFIX];
8043   if (i.prefix[BND_PREFIX] != 0)
8044     *p++ = BND_PREFIX_OPCODE;
8045   if (i.prefix[REX_PREFIX] != 0)
8046     *p++ = i.prefix[REX_PREFIX];
8047   *p = i.tm.base_opcode;
8048
8049   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8050     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8051   else if (cpu_arch_flags.bitfield.cpui386)
8052     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8053   else
8054     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8055   subtype |= code16;
8056
8057   sym = i.op[0].disps->X_add_symbol;
8058   off = i.op[0].disps->X_add_number;
8059
8060   if (i.op[0].disps->X_op != O_constant
8061       && i.op[0].disps->X_op != O_symbol)
8062     {
8063       /* Handle complex expressions.  */
8064       sym = make_expr_symbol (i.op[0].disps);
8065       off = 0;
8066     }
8067
8068   /* 1 possible extra opcode + 4 byte displacement go in var part.
8069      Pass reloc in fr_var.  */
8070   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8071 }
8072
8073 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8074 /* Return TRUE iff PLT32 relocation should be used for branching to
8075    symbol S.  */
8076
8077 static bfd_boolean
8078 need_plt32_p (symbolS *s)
8079 {
8080   /* PLT32 relocation is ELF only.  */
8081   if (!IS_ELF)
8082     return FALSE;
8083
8084 #ifdef TE_SOLARIS
8085   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8086      krtld support it.  */
8087   return FALSE;
8088 #endif
8089
8090   /* Since there is no need to prepare for PLT branch on x86-64, we
8091      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8092      be used as a marker for 32-bit PC-relative branches.  */
8093   if (!object_64bit)
8094     return FALSE;
8095
8096   /* Weak or undefined symbol need PLT32 relocation.  */
8097   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8098     return TRUE;
8099
8100   /* Non-global symbol doesn't need PLT32 relocation.  */
8101   if (! S_IS_EXTERNAL (s))
8102     return FALSE;
8103
8104   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8105      non-default visibilities are treated as normal global symbol
8106      so that PLT32 relocation can be used as a marker for 32-bit
8107      PC-relative branches.  It is useful for linker relaxation.  */
8108   return TRUE;
8109 }
8110 #endif
8111
8112 static void
8113 output_jump (void)
8114 {
8115   char *p;
8116   int size;
8117   fixS *fixP;
8118   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8119
8120   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8121     {
8122       /* This is a loop or jecxz type instruction.  */
8123       size = 1;
8124       if (i.prefix[ADDR_PREFIX] != 0)
8125         {
8126           FRAG_APPEND_1_CHAR (ADDR_PREFIX_OPCODE);
8127           i.prefixes -= 1;
8128         }
8129       /* Pentium4 branch hints.  */
8130       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8131           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8132         {
8133           FRAG_APPEND_1_CHAR (i.prefix[SEG_PREFIX]);
8134           i.prefixes--;
8135         }
8136     }
8137   else
8138     {
8139       int code16;
8140
8141       code16 = 0;
8142       if (flag_code == CODE_16BIT)
8143         code16 = CODE16;
8144
8145       if (i.prefix[DATA_PREFIX] != 0)
8146         {
8147           FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE);
8148           i.prefixes -= 1;
8149           code16 ^= flip_code16(code16);
8150         }
8151
8152       size = 4;
8153       if (code16)
8154         size = 2;
8155     }
8156
8157   /* BND prefixed jump.  */
8158   if (i.prefix[BND_PREFIX] != 0)
8159     {
8160       FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
8161       i.prefixes -= 1;
8162     }
8163
8164   if (i.prefix[REX_PREFIX] != 0)
8165     {
8166       FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]);
8167       i.prefixes -= 1;
8168     }
8169
8170   if (i.prefixes != 0)
8171     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8172
8173   p = frag_more (i.tm.opcode_length + size);
8174   switch (i.tm.opcode_length)
8175     {
8176     case 2:
8177       *p++ = i.tm.base_opcode >> 8;
8178       /* Fall through.  */
8179     case 1:
8180       *p++ = i.tm.base_opcode;
8181       break;
8182     default:
8183       abort ();
8184     }
8185
8186 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8187   if (size == 4
8188       && jump_reloc == NO_RELOC
8189       && need_plt32_p (i.op[0].disps->X_add_symbol))
8190     jump_reloc = BFD_RELOC_X86_64_PLT32;
8191 #endif
8192
8193   jump_reloc = reloc (size, 1, 1, jump_reloc);
8194
8195   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8196                       i.op[0].disps, 1, jump_reloc);
8197
8198   /* All jumps handled here are signed, but don't use a signed limit
8199      check for 32 and 16 bit jumps as we want to allow wrap around at
8200      4G and 64k respectively.  */
8201   if (size == 1)
8202     fixP->fx_signed = 1;
8203 }
8204
8205 static void
8206 output_interseg_jump (void)
8207 {
8208   char *p;
8209   int size;
8210   int prefix;
8211   int code16;
8212
8213   code16 = 0;
8214   if (flag_code == CODE_16BIT)
8215     code16 = CODE16;
8216
8217   prefix = 0;
8218   if (i.prefix[DATA_PREFIX] != 0)
8219     {
8220       prefix = 1;
8221       i.prefixes -= 1;
8222       code16 ^= CODE16;
8223     }
8224
8225   gas_assert (!i.prefix[REX_PREFIX]);
8226
8227   size = 4;
8228   if (code16)
8229     size = 2;
8230
8231   if (i.prefixes != 0)
8232     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8233
8234   /* 1 opcode; 2 segment; offset  */
8235   p = frag_more (prefix + 1 + 2 + size);
8236
8237   if (i.prefix[DATA_PREFIX] != 0)
8238     *p++ = DATA_PREFIX_OPCODE;
8239
8240   if (i.prefix[REX_PREFIX] != 0)
8241     *p++ = i.prefix[REX_PREFIX];
8242
8243   *p++ = i.tm.base_opcode;
8244   if (i.op[1].imms->X_op == O_constant)
8245     {
8246       offsetT n = i.op[1].imms->X_add_number;
8247
8248       if (size == 2
8249           && !fits_in_unsigned_word (n)
8250           && !fits_in_signed_word (n))
8251         {
8252           as_bad (_("16-bit jump out of range"));
8253           return;
8254         }
8255       md_number_to_chars (p, n, size);
8256     }
8257   else
8258     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8259                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
8260   if (i.op[0].imms->X_op != O_constant)
8261     as_bad (_("can't handle non absolute segment in `%s'"),
8262             i.tm.name);
8263   md_number_to_chars (p + size, (valueT) i.op[0].imms->X_add_number, 2);
8264 }
8265
8266 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8267 void
8268 x86_cleanup (void)
8269 {
8270   char *p;
8271   asection *seg = now_seg;
8272   subsegT subseg = now_subseg;
8273   asection *sec;
8274   unsigned int alignment, align_size_1;
8275   unsigned int isa_1_descsz, feature_2_descsz, descsz;
8276   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
8277   unsigned int padding;
8278
8279   if (!IS_ELF || !x86_used_note)
8280     return;
8281
8282   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
8283
8284   /* The .note.gnu.property section layout:
8285
8286      Field      Length          Contents
8287      ----       ----            ----
8288      n_namsz    4               4
8289      n_descsz   4               The note descriptor size
8290      n_type     4               NT_GNU_PROPERTY_TYPE_0
8291      n_name     4               "GNU"
8292      n_desc     n_descsz        The program property array
8293      ....       ....            ....
8294    */
8295
8296   /* Create the .note.gnu.property section.  */
8297   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
8298   bfd_set_section_flags (sec,
8299                          (SEC_ALLOC
8300                           | SEC_LOAD
8301                           | SEC_DATA
8302                           | SEC_HAS_CONTENTS
8303                           | SEC_READONLY));
8304
8305   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
8306     {
8307       align_size_1 = 7;
8308       alignment = 3;
8309     }
8310   else
8311     {
8312       align_size_1 = 3;
8313       alignment = 2;
8314     }
8315
8316   bfd_set_section_alignment (sec, alignment);
8317   elf_section_type (sec) = SHT_NOTE;
8318
8319   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
8320                                   + 4-byte data  */
8321   isa_1_descsz_raw = 4 + 4 + 4;
8322   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
8323   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
8324
8325   feature_2_descsz_raw = isa_1_descsz;
8326   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
8327                                       + 4-byte data  */
8328   feature_2_descsz_raw += 4 + 4 + 4;
8329   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
8330   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
8331                       & ~align_size_1);
8332
8333   descsz = feature_2_descsz;
8334   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
8335   p = frag_more (4 + 4 + 4 + 4 + descsz);
8336
8337   /* Write n_namsz.  */
8338   md_number_to_chars (p, (valueT) 4, 4);
8339
8340   /* Write n_descsz.  */
8341   md_number_to_chars (p + 4, (valueT) descsz, 4);
8342
8343   /* Write n_type.  */
8344   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
8345
8346   /* Write n_name.  */
8347   memcpy (p + 4 * 3, "GNU", 4);
8348
8349   /* Write 4-byte type.  */
8350   md_number_to_chars (p + 4 * 4,
8351                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
8352
8353   /* Write 4-byte data size.  */
8354   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
8355
8356   /* Write 4-byte data.  */
8357   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
8358
8359   /* Zero out paddings.  */
8360   padding = isa_1_descsz - isa_1_descsz_raw;
8361   if (padding)
8362     memset (p + 4 * 7, 0, padding);
8363
8364   /* Write 4-byte type.  */
8365   md_number_to_chars (p + isa_1_descsz + 4 * 4,
8366                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
8367
8368   /* Write 4-byte data size.  */
8369   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
8370
8371   /* Write 4-byte data.  */
8372   md_number_to_chars (p + isa_1_descsz + 4 * 6,
8373                       (valueT) x86_feature_2_used, 4);
8374
8375   /* Zero out paddings.  */
8376   padding = feature_2_descsz - feature_2_descsz_raw;
8377   if (padding)
8378     memset (p + isa_1_descsz + 4 * 7, 0, padding);
8379
8380   /* We probably can't restore the current segment, for there likely
8381      isn't one yet...  */
8382   if (seg && subseg)
8383     subseg_set (seg, subseg);
8384 }
8385 #endif
8386
8387 static unsigned int
8388 encoding_length (const fragS *start_frag, offsetT start_off,
8389                  const char *frag_now_ptr)
8390 {
8391   unsigned int len = 0;
8392
8393   if (start_frag != frag_now)
8394     {
8395       const fragS *fr = start_frag;
8396
8397       do {
8398         len += fr->fr_fix;
8399         fr = fr->fr_next;
8400       } while (fr && fr != frag_now);
8401     }
8402
8403   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
8404 }
8405
8406 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
8407    be macro-fused with conditional jumps.
8408    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
8409    or is one of the following format:
8410
8411     cmp m, imm
8412     add m, imm
8413     sub m, imm
8414    test m, imm
8415     and m, imm
8416     inc m
8417     dec m
8418
8419    it is unfusible.  */
8420
8421 static int
8422 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
8423 {
8424   /* No RIP address.  */
8425   if (i.base_reg && i.base_reg->reg_num == RegIP)
8426     return 0;
8427
8428   /* No VEX/EVEX encoding.  */
8429   if (is_any_vex_encoding (&i.tm))
8430     return 0;
8431
8432   /* add, sub without add/sub m, imm.  */
8433   if (i.tm.base_opcode <= 5
8434       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
8435       || ((i.tm.base_opcode | 3) == 0x83
8436           && (i.tm.extension_opcode == 0x5
8437               || i.tm.extension_opcode == 0x0)))
8438     {
8439       *mf_cmp_p = mf_cmp_alu_cmp;
8440       return !(i.mem_operands && i.imm_operands);
8441     }
8442
8443   /* and without and m, imm.  */
8444   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
8445       || ((i.tm.base_opcode | 3) == 0x83
8446           && i.tm.extension_opcode == 0x4))
8447     {
8448       *mf_cmp_p = mf_cmp_test_and;
8449       return !(i.mem_operands && i.imm_operands);
8450     }
8451
8452   /* test without test m imm.  */
8453   if ((i.tm.base_opcode | 1) == 0x85
8454       || (i.tm.base_opcode | 1) == 0xa9
8455       || ((i.tm.base_opcode | 1) == 0xf7
8456           && i.tm.extension_opcode == 0))
8457     {
8458       *mf_cmp_p = mf_cmp_test_and;
8459       return !(i.mem_operands && i.imm_operands);
8460     }
8461
8462   /* cmp without cmp m, imm.  */
8463   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
8464       || ((i.tm.base_opcode | 3) == 0x83
8465           && (i.tm.extension_opcode == 0x7)))
8466     {
8467       *mf_cmp_p = mf_cmp_alu_cmp;
8468       return !(i.mem_operands && i.imm_operands);
8469     }
8470
8471   /* inc, dec without inc/dec m.   */
8472   if ((i.tm.cpu_flags.bitfield.cpuno64
8473        && (i.tm.base_opcode | 0xf) == 0x4f)
8474       || ((i.tm.base_opcode | 1) == 0xff
8475           && i.tm.extension_opcode <= 0x1))
8476     {
8477       *mf_cmp_p = mf_cmp_incdec;
8478       return !i.mem_operands;
8479     }
8480
8481   return 0;
8482 }
8483
8484 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
8485
8486 static int
8487 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
8488 {
8489   /* NB: Don't work with COND_JUMP86 without i386.  */
8490   if (!align_branch_power
8491       || now_seg == absolute_section
8492       || !cpu_arch_flags.bitfield.cpui386
8493       || !(align_branch & align_branch_fused_bit))
8494     return 0;
8495
8496   if (maybe_fused_with_jcc_p (mf_cmp_p))
8497     {
8498       if (last_insn.kind == last_insn_other
8499           || last_insn.seg != now_seg)
8500         return 1;
8501       if (flag_debug)
8502         as_warn_where (last_insn.file, last_insn.line,
8503                        _("`%s` skips -malign-branch-boundary on `%s`"),
8504                        last_insn.name, i.tm.name);
8505     }
8506
8507   return 0;
8508 }
8509
8510 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
8511
8512 static int
8513 add_branch_prefix_frag_p (void)
8514 {
8515   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
8516      to PadLock instructions since they include prefixes in opcode.  */
8517   if (!align_branch_power
8518       || !align_branch_prefix_size
8519       || now_seg == absolute_section
8520       || i.tm.cpu_flags.bitfield.cpupadlock
8521       || !cpu_arch_flags.bitfield.cpui386)
8522     return 0;
8523
8524   /* Don't add prefix if it is a prefix or there is no operand in case
8525      that segment prefix is special.  */
8526   if (!i.operands || i.tm.opcode_modifier.isprefix)
8527     return 0;
8528
8529   if (last_insn.kind == last_insn_other
8530       || last_insn.seg != now_seg)
8531     return 1;
8532
8533   if (flag_debug)
8534     as_warn_where (last_insn.file, last_insn.line,
8535                    _("`%s` skips -malign-branch-boundary on `%s`"),
8536                    last_insn.name, i.tm.name);
8537
8538   return 0;
8539 }
8540
8541 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
8542
8543 static int
8544 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
8545                            enum mf_jcc_kind *mf_jcc_p)
8546 {
8547   int add_padding;
8548
8549   /* NB: Don't work with COND_JUMP86 without i386.  */
8550   if (!align_branch_power
8551       || now_seg == absolute_section
8552       || !cpu_arch_flags.bitfield.cpui386)
8553     return 0;
8554
8555   add_padding = 0;
8556
8557   /* Check for jcc and direct jmp.  */
8558   if (i.tm.opcode_modifier.jump == JUMP)
8559     {
8560       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
8561         {
8562           *branch_p = align_branch_jmp;
8563           add_padding = align_branch & align_branch_jmp_bit;
8564         }
8565       else
8566         {
8567           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
8568              igore the lowest bit.  */
8569           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
8570           *branch_p = align_branch_jcc;
8571           if ((align_branch & align_branch_jcc_bit))
8572             add_padding = 1;
8573         }
8574     }
8575   else if (is_any_vex_encoding (&i.tm))
8576     return 0;
8577   else if ((i.tm.base_opcode | 1) == 0xc3)
8578     {
8579       /* Near ret.  */
8580       *branch_p = align_branch_ret;
8581       if ((align_branch & align_branch_ret_bit))
8582         add_padding = 1;
8583     }
8584   else
8585     {
8586       /* Check for indirect jmp, direct and indirect calls.  */
8587       if (i.tm.base_opcode == 0xe8)
8588         {
8589           /* Direct call.  */
8590           *branch_p = align_branch_call;
8591           if ((align_branch & align_branch_call_bit))
8592             add_padding = 1;
8593         }
8594       else if (i.tm.base_opcode == 0xff
8595                && (i.tm.extension_opcode == 2
8596                    || i.tm.extension_opcode == 4))
8597         {
8598           /* Indirect call and jmp.  */
8599           *branch_p = align_branch_indirect;
8600           if ((align_branch & align_branch_indirect_bit))
8601             add_padding = 1;
8602         }
8603
8604       if (add_padding
8605           && i.disp_operands
8606           && tls_get_addr
8607           && (i.op[0].disps->X_op == O_symbol
8608               || (i.op[0].disps->X_op == O_subtract
8609                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
8610         {
8611           symbolS *s = i.op[0].disps->X_add_symbol;
8612           /* No padding to call to global or undefined tls_get_addr.  */
8613           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
8614               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
8615             return 0;
8616         }
8617     }
8618
8619   if (add_padding
8620       && last_insn.kind != last_insn_other
8621       && last_insn.seg == now_seg)
8622     {
8623       if (flag_debug)
8624         as_warn_where (last_insn.file, last_insn.line,
8625                        _("`%s` skips -malign-branch-boundary on `%s`"),
8626                        last_insn.name, i.tm.name);
8627       return 0;
8628     }
8629
8630   return add_padding;
8631 }
8632
8633 static void
8634 output_insn (void)
8635 {
8636   fragS *insn_start_frag;
8637   offsetT insn_start_off;
8638   fragS *fragP = NULL;
8639   enum align_branch_kind branch = align_branch_none;
8640   /* The initializer is arbitrary just to avoid uninitialized error.
8641      it's actually either assigned in add_branch_padding_frag_p
8642      or never be used.  */
8643   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
8644
8645 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8646   if (IS_ELF && x86_used_note)
8647     {
8648       if (i.tm.cpu_flags.bitfield.cpucmov)
8649         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV;
8650       if (i.tm.cpu_flags.bitfield.cpusse)
8651         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE;
8652       if (i.tm.cpu_flags.bitfield.cpusse2)
8653         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE2;
8654       if (i.tm.cpu_flags.bitfield.cpusse3)
8655         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE3;
8656       if (i.tm.cpu_flags.bitfield.cpussse3)
8657         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSSE3;
8658       if (i.tm.cpu_flags.bitfield.cpusse4_1)
8659         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_1;
8660       if (i.tm.cpu_flags.bitfield.cpusse4_2)
8661         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_2;
8662       if (i.tm.cpu_flags.bitfield.cpuavx)
8663         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX;
8664       if (i.tm.cpu_flags.bitfield.cpuavx2)
8665         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX2;
8666       if (i.tm.cpu_flags.bitfield.cpufma)
8667         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_FMA;
8668       if (i.tm.cpu_flags.bitfield.cpuavx512f)
8669         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512F;
8670       if (i.tm.cpu_flags.bitfield.cpuavx512cd)
8671         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512CD;
8672       if (i.tm.cpu_flags.bitfield.cpuavx512er)
8673         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512ER;
8674       if (i.tm.cpu_flags.bitfield.cpuavx512pf)
8675         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512PF;
8676       if (i.tm.cpu_flags.bitfield.cpuavx512vl)
8677         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512VL;
8678       if (i.tm.cpu_flags.bitfield.cpuavx512dq)
8679         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512DQ;
8680       if (i.tm.cpu_flags.bitfield.cpuavx512bw)
8681         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512BW;
8682       if (i.tm.cpu_flags.bitfield.cpuavx512_4fmaps)
8683         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS;
8684       if (i.tm.cpu_flags.bitfield.cpuavx512_4vnniw)
8685         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW;
8686       if (i.tm.cpu_flags.bitfield.cpuavx512_bitalg)
8687         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BITALG;
8688       if (i.tm.cpu_flags.bitfield.cpuavx512ifma)
8689         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_IFMA;
8690       if (i.tm.cpu_flags.bitfield.cpuavx512vbmi)
8691         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI;
8692       if (i.tm.cpu_flags.bitfield.cpuavx512_vbmi2)
8693         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
8694       if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
8695         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
8696       if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
8697         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
8698
8699       if (i.tm.cpu_flags.bitfield.cpu8087
8700           || i.tm.cpu_flags.bitfield.cpu287
8701           || i.tm.cpu_flags.bitfield.cpu387
8702           || i.tm.cpu_flags.bitfield.cpu687
8703           || i.tm.cpu_flags.bitfield.cpufisttp)
8704         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
8705       if (i.has_regmmx
8706           || i.tm.base_opcode == 0xf77 /* emms */
8707           || i.tm.base_opcode == 0xf0e /* femms */
8708           || i.tm.base_opcode == 0xf2a /* cvtpi2ps */
8709           || i.tm.base_opcode == 0x660f2a /* cvtpi2pd */)
8710         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
8711       if (i.has_regxmm)
8712         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
8713       if (i.has_regymm)
8714         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
8715       if (i.has_regzmm)
8716         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
8717       if (i.tm.cpu_flags.bitfield.cpufxsr)
8718         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
8719       if (i.tm.cpu_flags.bitfield.cpuxsave)
8720         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
8721       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
8722         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
8723       if (i.tm.cpu_flags.bitfield.cpuxsavec)
8724         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
8725     }
8726 #endif
8727
8728   /* Tie dwarf2 debug info to the address at the start of the insn.
8729      We can't do this after the insn has been output as the current
8730      frag may have been closed off.  eg. by frag_var.  */
8731   dwarf2_emit_insn (0);
8732
8733   insn_start_frag = frag_now;
8734   insn_start_off = frag_now_fix ();
8735
8736   if (add_branch_padding_frag_p (&branch, &mf_jcc))
8737     {
8738       char *p;
8739       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
8740       unsigned int max_branch_padding_size = 14;
8741
8742       /* Align section to boundary.  */
8743       record_alignment (now_seg, align_branch_power);
8744
8745       /* Make room for padding.  */
8746       frag_grow (max_branch_padding_size);
8747
8748       /* Start of the padding.  */
8749       p = frag_more (0);
8750
8751       fragP = frag_now;
8752
8753       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
8754                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
8755                 NULL, 0, p);
8756
8757       fragP->tc_frag_data.mf_type = mf_jcc;
8758       fragP->tc_frag_data.branch_type = branch;
8759       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
8760     }
8761
8762   /* Output jumps.  */
8763   if (i.tm.opcode_modifier.jump == JUMP)
8764     output_branch ();
8765   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
8766            || i.tm.opcode_modifier.jump == JUMP_DWORD)
8767     output_jump ();
8768   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
8769     output_interseg_jump ();
8770   else
8771     {
8772       /* Output normal instructions here.  */
8773       char *p;
8774       unsigned char *q;
8775       unsigned int j;
8776       unsigned int prefix;
8777       enum mf_cmp_kind mf_cmp;
8778
8779       if (avoid_fence
8780           && (i.tm.base_opcode == 0xfaee8
8781               || i.tm.base_opcode == 0xfaef0
8782               || i.tm.base_opcode == 0xfaef8))
8783         {
8784           /* Encode lfence, mfence, and sfence as
8785              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
8786           offsetT val = 0x240483f0ULL;
8787           p = frag_more (5);
8788           md_number_to_chars (p, val, 5);
8789           return;
8790         }
8791
8792       /* Some processors fail on LOCK prefix. This options makes
8793          assembler ignore LOCK prefix and serves as a workaround.  */
8794       if (omit_lock_prefix)
8795         {
8796           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE)
8797             return;
8798           i.prefix[LOCK_PREFIX] = 0;
8799         }
8800
8801       if (branch)
8802         /* Skip if this is a branch.  */
8803         ;
8804       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
8805         {
8806           /* Make room for padding.  */
8807           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
8808           p = frag_more (0);
8809
8810           fragP = frag_now;
8811
8812           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
8813                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
8814                     NULL, 0, p);
8815
8816           fragP->tc_frag_data.mf_type = mf_cmp;
8817           fragP->tc_frag_data.branch_type = align_branch_fused;
8818           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
8819         }
8820       else if (add_branch_prefix_frag_p ())
8821         {
8822           unsigned int max_prefix_size = align_branch_prefix_size;
8823
8824           /* Make room for padding.  */
8825           frag_grow (max_prefix_size);
8826           p = frag_more (0);
8827
8828           fragP = frag_now;
8829
8830           frag_var (rs_machine_dependent, max_prefix_size, 0,
8831                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
8832                     NULL, 0, p);
8833
8834           fragP->tc_frag_data.max_bytes = max_prefix_size;
8835         }
8836
8837       /* Since the VEX/EVEX prefix contains the implicit prefix, we
8838          don't need the explicit prefix.  */
8839       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
8840         {
8841           switch (i.tm.opcode_length)
8842             {
8843             case 3:
8844               if (i.tm.base_opcode & 0xff000000)
8845                 {
8846                   prefix = (i.tm.base_opcode >> 24) & 0xff;
8847                   if (!i.tm.cpu_flags.bitfield.cpupadlock
8848                       || prefix != REPE_PREFIX_OPCODE
8849                       || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
8850                     add_prefix (prefix);
8851                 }
8852               break;
8853             case 2:
8854               if ((i.tm.base_opcode & 0xff0000) != 0)
8855                 {
8856                   prefix = (i.tm.base_opcode >> 16) & 0xff;
8857                   add_prefix (prefix);
8858                 }
8859               break;
8860             case 1:
8861               break;
8862             case 0:
8863               /* Check for pseudo prefixes.  */
8864               as_bad_where (insn_start_frag->fr_file,
8865                             insn_start_frag->fr_line,
8866                              _("pseudo prefix without instruction"));
8867               return;
8868             default:
8869               abort ();
8870             }
8871
8872 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
8873           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
8874              R_X86_64_GOTTPOFF relocation so that linker can safely
8875              perform IE->LE optimization.  A dummy REX_OPCODE prefix
8876              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
8877              relocation for GDesc -> IE/LE optimization.  */
8878           if (x86_elf_abi == X86_64_X32_ABI
8879               && i.operands == 2
8880               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
8881                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
8882               && i.prefix[REX_PREFIX] == 0)
8883             add_prefix (REX_OPCODE);
8884 #endif
8885
8886           /* The prefix bytes.  */
8887           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
8888             if (*q)
8889               FRAG_APPEND_1_CHAR (*q);
8890         }
8891       else
8892         {
8893           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
8894             if (*q)
8895               switch (j)
8896                 {
8897                 case REX_PREFIX:
8898                   /* REX byte is encoded in VEX prefix.  */
8899                   break;
8900                 case SEG_PREFIX:
8901                 case ADDR_PREFIX:
8902                   FRAG_APPEND_1_CHAR (*q);
8903                   break;
8904                 default:
8905                   /* There should be no other prefixes for instructions
8906                      with VEX prefix.  */
8907                   abort ();
8908                 }
8909
8910           /* For EVEX instructions i.vrex should become 0 after
8911              build_evex_prefix.  For VEX instructions upper 16 registers
8912              aren't available, so VREX should be 0.  */
8913           if (i.vrex)
8914             abort ();
8915           /* Now the VEX prefix.  */
8916           p = frag_more (i.vex.length);
8917           for (j = 0; j < i.vex.length; j++)
8918             p[j] = i.vex.bytes[j];
8919         }
8920
8921       /* Now the opcode; be careful about word order here!  */
8922       if (i.tm.opcode_length == 1)
8923         {
8924           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
8925         }
8926       else
8927         {
8928           switch (i.tm.opcode_length)
8929             {
8930             case 4:
8931               p = frag_more (4);
8932               *p++ = (i.tm.base_opcode >> 24) & 0xff;
8933               *p++ = (i.tm.base_opcode >> 16) & 0xff;
8934               break;
8935             case 3:
8936               p = frag_more (3);
8937               *p++ = (i.tm.base_opcode >> 16) & 0xff;
8938               break;
8939             case 2:
8940               p = frag_more (2);
8941               break;
8942             default:
8943               abort ();
8944               break;
8945             }
8946
8947           /* Put out high byte first: can't use md_number_to_chars!  */
8948           *p++ = (i.tm.base_opcode >> 8) & 0xff;
8949           *p = i.tm.base_opcode & 0xff;
8950         }
8951
8952       /* Now the modrm byte and sib byte (if present).  */
8953       if (i.tm.opcode_modifier.modrm)
8954         {
8955           FRAG_APPEND_1_CHAR ((i.rm.regmem << 0
8956                                | i.rm.reg << 3
8957                                | i.rm.mode << 6));
8958           /* If i.rm.regmem == ESP (4)
8959              && i.rm.mode != (Register mode)
8960              && not 16 bit
8961              ==> need second modrm byte.  */
8962           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
8963               && i.rm.mode != 3
8964               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
8965             FRAG_APPEND_1_CHAR ((i.sib.base << 0
8966                                  | i.sib.index << 3
8967                                  | i.sib.scale << 6));
8968         }
8969
8970       if (i.disp_operands)
8971         output_disp (insn_start_frag, insn_start_off);
8972
8973       if (i.imm_operands)
8974         output_imm (insn_start_frag, insn_start_off);
8975
8976       /*
8977        * frag_now_fix () returning plain abs_section_offset when we're in the
8978        * absolute section, and abs_section_offset not getting updated as data
8979        * gets added to the frag breaks the logic below.
8980        */
8981       if (now_seg != absolute_section)
8982         {
8983           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
8984           if (j > 15)
8985             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
8986                      j);
8987           else if (fragP)
8988             {
8989               /* NB: Don't add prefix with GOTPC relocation since
8990                  output_disp() above depends on the fixed encoding
8991                  length.  Can't add prefix with TLS relocation since
8992                  it breaks TLS linker optimization.  */
8993               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
8994               /* Prefix count on the current instruction.  */
8995               unsigned int count = i.vex.length;
8996               unsigned int k;
8997               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
8998                 /* REX byte is encoded in VEX/EVEX prefix.  */
8999                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9000                   count++;
9001
9002               /* Count prefixes for extended opcode maps.  */
9003               if (!i.vex.length)
9004                 switch (i.tm.opcode_length)
9005                   {
9006                   case 3:
9007                     if (((i.tm.base_opcode >> 16) & 0xff) == 0xf)
9008                       {
9009                         count++;
9010                         switch ((i.tm.base_opcode >> 8) & 0xff)
9011                           {
9012                           case 0x38:
9013                           case 0x3a:
9014                             count++;
9015                             break;
9016                           default:
9017                             break;
9018                           }
9019                       }
9020                     break;
9021                   case 2:
9022                     if (((i.tm.base_opcode >> 8) & 0xff) == 0xf)
9023                       count++;
9024                     break;
9025                   case 1:
9026                     break;
9027                   default:
9028                     abort ();
9029                   }
9030
9031               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9032                   == BRANCH_PREFIX)
9033                 {
9034                   /* Set the maximum prefix size in BRANCH_PREFIX
9035                      frag.  */
9036                   if (fragP->tc_frag_data.max_bytes > max)
9037                     fragP->tc_frag_data.max_bytes = max;
9038                   if (fragP->tc_frag_data.max_bytes > count)
9039                     fragP->tc_frag_data.max_bytes -= count;
9040                   else
9041                     fragP->tc_frag_data.max_bytes = 0;
9042                 }
9043               else
9044                 {
9045                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9046                      frag.  */
9047                   unsigned int max_prefix_size;
9048                   if (align_branch_prefix_size > max)
9049                     max_prefix_size = max;
9050                   else
9051                     max_prefix_size = align_branch_prefix_size;
9052                   if (max_prefix_size > count)
9053                     fragP->tc_frag_data.max_prefix_length
9054                       = max_prefix_size - count;
9055                 }
9056
9057               /* Use existing segment prefix if possible.  Use CS
9058                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9059                  segment prefix with ESP/EBP base register and use DS
9060                  segment prefix without ESP/EBP base register.  */
9061               if (i.prefix[SEG_PREFIX])
9062                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9063               else if (flag_code == CODE_64BIT)
9064                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9065               else if (i.base_reg
9066                        && (i.base_reg->reg_num == 4
9067                            || i.base_reg->reg_num == 5))
9068                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9069               else
9070                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9071             }
9072         }
9073     }
9074
9075   /* NB: Don't work with COND_JUMP86 without i386.  */
9076   if (align_branch_power
9077       && now_seg != absolute_section
9078       && cpu_arch_flags.bitfield.cpui386)
9079     {
9080       /* Terminate each frag so that we can add prefix and check for
9081          fused jcc.  */
9082       frag_wane (frag_now);
9083       frag_new (0);
9084     }
9085
9086 #ifdef DEBUG386
9087   if (flag_debug)
9088     {
9089       pi ("" /*line*/, &i);
9090     }
9091 #endif /* DEBUG386  */
9092 }
9093
9094 /* Return the size of the displacement operand N.  */
9095
9096 static int
9097 disp_size (unsigned int n)
9098 {
9099   int size = 4;
9100
9101   if (i.types[n].bitfield.disp64)
9102     size = 8;
9103   else if (i.types[n].bitfield.disp8)
9104     size = 1;
9105   else if (i.types[n].bitfield.disp16)
9106     size = 2;
9107   return size;
9108 }
9109
9110 /* Return the size of the immediate operand N.  */
9111
9112 static int
9113 imm_size (unsigned int n)
9114 {
9115   int size = 4;
9116   if (i.types[n].bitfield.imm64)
9117     size = 8;
9118   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9119     size = 1;
9120   else if (i.types[n].bitfield.imm16)
9121     size = 2;
9122   return size;
9123 }
9124
9125 static void
9126 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9127 {
9128   char *p;
9129   unsigned int n;
9130
9131   for (n = 0; n < i.operands; n++)
9132     {
9133       if (operand_type_check (i.types[n], disp))
9134         {
9135           if (i.op[n].disps->X_op == O_constant)
9136             {
9137               int size = disp_size (n);
9138               offsetT val = i.op[n].disps->X_add_number;
9139
9140               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9141                                      size);
9142               p = frag_more (size);
9143               md_number_to_chars (p, val, size);
9144             }
9145           else
9146             {
9147               enum bfd_reloc_code_real reloc_type;
9148               int size = disp_size (n);
9149               int sign = i.types[n].bitfield.disp32s;
9150               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
9151               fixS *fixP;
9152
9153               /* We can't have 8 bit displacement here.  */
9154               gas_assert (!i.types[n].bitfield.disp8);
9155
9156               /* The PC relative address is computed relative
9157                  to the instruction boundary, so in case immediate
9158                  fields follows, we need to adjust the value.  */
9159               if (pcrel && i.imm_operands)
9160                 {
9161                   unsigned int n1;
9162                   int sz = 0;
9163
9164                   for (n1 = 0; n1 < i.operands; n1++)
9165                     if (operand_type_check (i.types[n1], imm))
9166                       {
9167                         /* Only one immediate is allowed for PC
9168                            relative address.  */
9169                         gas_assert (sz == 0);
9170                         sz = imm_size (n1);
9171                         i.op[n].disps->X_add_number -= sz;
9172                       }
9173                   /* We should find the immediate.  */
9174                   gas_assert (sz != 0);
9175                 }
9176
9177               p = frag_more (size);
9178               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9179               if (GOT_symbol
9180                   && GOT_symbol == i.op[n].disps->X_add_symbol
9181                   && (((reloc_type == BFD_RELOC_32
9182                         || reloc_type == BFD_RELOC_X86_64_32S
9183                         || (reloc_type == BFD_RELOC_64
9184                             && object_64bit))
9185                        && (i.op[n].disps->X_op == O_symbol
9186                            || (i.op[n].disps->X_op == O_add
9187                                && ((symbol_get_value_expression
9188                                     (i.op[n].disps->X_op_symbol)->X_op)
9189                                    == O_subtract))))
9190                       || reloc_type == BFD_RELOC_32_PCREL))
9191                 {
9192                   if (!object_64bit)
9193                     {
9194                       reloc_type = BFD_RELOC_386_GOTPC;
9195                       i.has_gotpc_tls_reloc = TRUE;
9196                       i.op[n].imms->X_add_number +=
9197                         encoding_length (insn_start_frag, insn_start_off, p);
9198                     }
9199                   else if (reloc_type == BFD_RELOC_64)
9200                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9201                   else
9202                     /* Don't do the adjustment for x86-64, as there
9203                        the pcrel addressing is relative to the _next_
9204                        insn, and that is taken care of in other code.  */
9205                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9206                 }
9207               else if (align_branch_power)
9208                 {
9209                   switch (reloc_type)
9210                     {
9211                     case BFD_RELOC_386_TLS_GD:
9212                     case BFD_RELOC_386_TLS_LDM:
9213                     case BFD_RELOC_386_TLS_IE:
9214                     case BFD_RELOC_386_TLS_IE_32:
9215                     case BFD_RELOC_386_TLS_GOTIE:
9216                     case BFD_RELOC_386_TLS_GOTDESC:
9217                     case BFD_RELOC_386_TLS_DESC_CALL:
9218                     case BFD_RELOC_X86_64_TLSGD:
9219                     case BFD_RELOC_X86_64_TLSLD:
9220                     case BFD_RELOC_X86_64_GOTTPOFF:
9221                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
9222                     case BFD_RELOC_X86_64_TLSDESC_CALL:
9223                       i.has_gotpc_tls_reloc = TRUE;
9224                     default:
9225                       break;
9226                     }
9227                 }
9228               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
9229                                   size, i.op[n].disps, pcrel,
9230                                   reloc_type);
9231               /* Check for "call/jmp *mem", "mov mem, %reg",
9232                  "test %reg, mem" and "binop mem, %reg" where binop
9233                  is one of adc, add, and, cmp, or, sbb, sub, xor
9234                  instructions without data prefix.  Always generate
9235                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
9236               if (i.prefix[DATA_PREFIX] == 0
9237                   && (generate_relax_relocations
9238                       || (!object_64bit
9239                           && i.rm.mode == 0
9240                           && i.rm.regmem == 5))
9241                   && (i.rm.mode == 2
9242                       || (i.rm.mode == 0 && i.rm.regmem == 5))
9243                   && !is_any_vex_encoding(&i.tm)
9244                   && ((i.operands == 1
9245                        && i.tm.base_opcode == 0xff
9246                        && (i.rm.reg == 2 || i.rm.reg == 4))
9247                       || (i.operands == 2
9248                           && (i.tm.base_opcode == 0x8b
9249                               || i.tm.base_opcode == 0x85
9250                               || (i.tm.base_opcode & ~0x38) == 0x03))))
9251                 {
9252                   if (object_64bit)
9253                     {
9254                       fixP->fx_tcbit = i.rex != 0;
9255                       if (i.base_reg
9256                           && (i.base_reg->reg_num == RegIP))
9257                       fixP->fx_tcbit2 = 1;
9258                     }
9259                   else
9260                     fixP->fx_tcbit2 = 1;
9261                 }
9262             }
9263         }
9264     }
9265 }
9266
9267 static void
9268 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
9269 {
9270   char *p;
9271   unsigned int n;
9272
9273   for (n = 0; n < i.operands; n++)
9274     {
9275       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
9276       if (i.rounding && (int) n == i.rounding->operand)
9277         continue;
9278
9279       if (operand_type_check (i.types[n], imm))
9280         {
9281           if (i.op[n].imms->X_op == O_constant)
9282             {
9283               int size = imm_size (n);
9284               offsetT val;
9285
9286               val = offset_in_range (i.op[n].imms->X_add_number,
9287                                      size);
9288               p = frag_more (size);
9289               md_number_to_chars (p, val, size);
9290             }
9291           else
9292             {
9293               /* Not absolute_section.
9294                  Need a 32-bit fixup (don't support 8bit
9295                  non-absolute imms).  Try to support other
9296                  sizes ...  */
9297               enum bfd_reloc_code_real reloc_type;
9298               int size = imm_size (n);
9299               int sign;
9300
9301               if (i.types[n].bitfield.imm32s
9302                   && (i.suffix == QWORD_MNEM_SUFFIX
9303                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
9304                 sign = 1;
9305               else
9306                 sign = 0;
9307
9308               p = frag_more (size);
9309               reloc_type = reloc (size, 0, sign, i.reloc[n]);
9310
9311               /*   This is tough to explain.  We end up with this one if we
9312                * have operands that look like
9313                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
9314                * obtain the absolute address of the GOT, and it is strongly
9315                * preferable from a performance point of view to avoid using
9316                * a runtime relocation for this.  The actual sequence of
9317                * instructions often look something like:
9318                *
9319                *        call    .L66
9320                * .L66:
9321                *        popl    %ebx
9322                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
9323                *
9324                *   The call and pop essentially return the absolute address
9325                * of the label .L66 and store it in %ebx.  The linker itself
9326                * will ultimately change the first operand of the addl so
9327                * that %ebx points to the GOT, but to keep things simple, the
9328                * .o file must have this operand set so that it generates not
9329                * the absolute address of .L66, but the absolute address of
9330                * itself.  This allows the linker itself simply treat a GOTPC
9331                * relocation as asking for a pcrel offset to the GOT to be
9332                * added in, and the addend of the relocation is stored in the
9333                * operand field for the instruction itself.
9334                *
9335                *   Our job here is to fix the operand so that it would add
9336                * the correct offset so that %ebx would point to itself.  The
9337                * thing that is tricky is that .-.L66 will point to the
9338                * beginning of the instruction, so we need to further modify
9339                * the operand so that it will point to itself.  There are
9340                * other cases where you have something like:
9341                *
9342                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
9343                *
9344                * and here no correction would be required.  Internally in
9345                * the assembler we treat operands of this form as not being
9346                * pcrel since the '.' is explicitly mentioned, and I wonder
9347                * whether it would simplify matters to do it this way.  Who
9348                * knows.  In earlier versions of the PIC patches, the
9349                * pcrel_adjust field was used to store the correction, but
9350                * since the expression is not pcrel, I felt it would be
9351                * confusing to do it this way.  */
9352
9353               if ((reloc_type == BFD_RELOC_32
9354                    || reloc_type == BFD_RELOC_X86_64_32S
9355                    || reloc_type == BFD_RELOC_64)
9356                   && GOT_symbol
9357                   && GOT_symbol == i.op[n].imms->X_add_symbol
9358                   && (i.op[n].imms->X_op == O_symbol
9359                       || (i.op[n].imms->X_op == O_add
9360                           && ((symbol_get_value_expression
9361                                (i.op[n].imms->X_op_symbol)->X_op)
9362                               == O_subtract))))
9363                 {
9364                   if (!object_64bit)
9365                     reloc_type = BFD_RELOC_386_GOTPC;
9366                   else if (size == 4)
9367                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9368                   else if (size == 8)
9369                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9370                   i.has_gotpc_tls_reloc = TRUE;
9371                   i.op[n].imms->X_add_number +=
9372                     encoding_length (insn_start_frag, insn_start_off, p);
9373                 }
9374               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9375                            i.op[n].imms, 0, reloc_type);
9376             }
9377         }
9378     }
9379 }
9380 \f
9381 /* x86_cons_fix_new is called via the expression parsing code when a
9382    reloc is needed.  We use this hook to get the correct .got reloc.  */
9383 static int cons_sign = -1;
9384
9385 void
9386 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
9387                   expressionS *exp, bfd_reloc_code_real_type r)
9388 {
9389   r = reloc (len, 0, cons_sign, r);
9390
9391 #ifdef TE_PE
9392   if (exp->X_op == O_secrel)
9393     {
9394       exp->X_op = O_symbol;
9395       r = BFD_RELOC_32_SECREL;
9396     }
9397 #endif
9398
9399   fix_new_exp (frag, off, len, exp, 0, r);
9400 }
9401
9402 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
9403    purpose of the `.dc.a' internal pseudo-op.  */
9404
9405 int
9406 x86_address_bytes (void)
9407 {
9408   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
9409     return 4;
9410   return stdoutput->arch_info->bits_per_address / 8;
9411 }
9412
9413 #if !(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
9414     || defined (LEX_AT)
9415 # define lex_got(reloc, adjust, types) NULL
9416 #else
9417 /* Parse operands of the form
9418    <symbol>@GOTOFF+<nnn>
9419    and similar .plt or .got references.
9420
9421    If we find one, set up the correct relocation in RELOC and copy the
9422    input string, minus the `@GOTOFF' into a malloc'd buffer for
9423    parsing by the calling routine.  Return this buffer, and if ADJUST
9424    is non-null set it to the length of the string we removed from the
9425    input line.  Otherwise return NULL.  */
9426 static char *
9427 lex_got (enum bfd_reloc_code_real *rel,
9428          int *adjust,
9429          i386_operand_type *types)
9430 {
9431   /* Some of the relocations depend on the size of what field is to
9432      be relocated.  But in our callers i386_immediate and i386_displacement
9433      we don't yet know the operand size (this will be set by insn
9434      matching).  Hence we record the word32 relocation here,
9435      and adjust the reloc according to the real size in reloc().  */
9436   static const struct {
9437     const char *str;
9438     int len;
9439     const enum bfd_reloc_code_real rel[2];
9440     const i386_operand_type types64;
9441   } gotrel[] = {
9442 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9443     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
9444                                         BFD_RELOC_SIZE32 },
9445       OPERAND_TYPE_IMM32_64 },
9446 #endif
9447     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
9448                                        BFD_RELOC_X86_64_PLTOFF64 },
9449       OPERAND_TYPE_IMM64 },
9450     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
9451                                        BFD_RELOC_X86_64_PLT32    },
9452       OPERAND_TYPE_IMM32_32S_DISP32 },
9453     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
9454                                        BFD_RELOC_X86_64_GOTPLT64 },
9455       OPERAND_TYPE_IMM64_DISP64 },
9456     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
9457                                        BFD_RELOC_X86_64_GOTOFF64 },
9458       OPERAND_TYPE_IMM64_DISP64 },
9459     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
9460                                        BFD_RELOC_X86_64_GOTPCREL },
9461       OPERAND_TYPE_IMM32_32S_DISP32 },
9462     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
9463                                        BFD_RELOC_X86_64_TLSGD    },
9464       OPERAND_TYPE_IMM32_32S_DISP32 },
9465     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
9466                                        _dummy_first_bfd_reloc_code_real },
9467       OPERAND_TYPE_NONE },
9468     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
9469                                        BFD_RELOC_X86_64_TLSLD    },
9470       OPERAND_TYPE_IMM32_32S_DISP32 },
9471     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
9472                                        BFD_RELOC_X86_64_GOTTPOFF },
9473       OPERAND_TYPE_IMM32_32S_DISP32 },
9474     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
9475                                        BFD_RELOC_X86_64_TPOFF32  },
9476       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9477     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
9478                                        _dummy_first_bfd_reloc_code_real },
9479       OPERAND_TYPE_NONE },
9480     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
9481                                        BFD_RELOC_X86_64_DTPOFF32 },
9482       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9483     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
9484                                        _dummy_first_bfd_reloc_code_real },
9485       OPERAND_TYPE_NONE },
9486     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
9487                                        _dummy_first_bfd_reloc_code_real },
9488       OPERAND_TYPE_NONE },
9489     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
9490                                        BFD_RELOC_X86_64_GOT32    },
9491       OPERAND_TYPE_IMM32_32S_64_DISP32 },
9492     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
9493                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
9494       OPERAND_TYPE_IMM32_32S_DISP32 },
9495     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
9496                                        BFD_RELOC_X86_64_TLSDESC_CALL },
9497       OPERAND_TYPE_IMM32_32S_DISP32 },
9498   };
9499   char *cp;
9500   unsigned int j;
9501
9502 #if defined (OBJ_MAYBE_ELF)
9503   if (!IS_ELF)
9504     return NULL;
9505 #endif
9506
9507   for (cp = input_line_pointer; *cp != '@'; cp++)
9508     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
9509       return NULL;
9510
9511   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
9512     {
9513       int len = gotrel[j].len;
9514       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
9515         {
9516           if (gotrel[j].rel[object_64bit] != 0)
9517             {
9518               int first, second;
9519               char *tmpbuf, *past_reloc;
9520
9521               *rel = gotrel[j].rel[object_64bit];
9522
9523               if (types)
9524                 {
9525                   if (flag_code != CODE_64BIT)
9526                     {
9527                       types->bitfield.imm32 = 1;
9528                       types->bitfield.disp32 = 1;
9529                     }
9530                   else
9531                     *types = gotrel[j].types64;
9532                 }
9533
9534               if (j != 0 && GOT_symbol == NULL)
9535                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
9536
9537               /* The length of the first part of our input line.  */
9538               first = cp - input_line_pointer;
9539
9540               /* The second part goes from after the reloc token until
9541                  (and including) an end_of_line char or comma.  */
9542               past_reloc = cp + 1 + len;
9543               cp = past_reloc;
9544               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
9545                 ++cp;
9546               second = cp + 1 - past_reloc;
9547
9548               /* Allocate and copy string.  The trailing NUL shouldn't
9549                  be necessary, but be safe.  */
9550               tmpbuf = XNEWVEC (char, first + second + 2);
9551               memcpy (tmpbuf, input_line_pointer, first);
9552               if (second != 0 && *past_reloc != ' ')
9553                 /* Replace the relocation token with ' ', so that
9554                    errors like foo@GOTOFF1 will be detected.  */
9555                 tmpbuf[first++] = ' ';
9556               else
9557                 /* Increment length by 1 if the relocation token is
9558                    removed.  */
9559                 len++;
9560               if (adjust)
9561                 *adjust = len;
9562               memcpy (tmpbuf + first, past_reloc, second);
9563               tmpbuf[first + second] = '\0';
9564               return tmpbuf;
9565             }
9566
9567           as_bad (_("@%s reloc is not supported with %d-bit output format"),
9568                   gotrel[j].str, 1 << (5 + object_64bit));
9569           return NULL;
9570         }
9571     }
9572
9573   /* Might be a symbol version string.  Don't as_bad here.  */
9574   return NULL;
9575 }
9576 #endif
9577
9578 #ifdef TE_PE
9579 #ifdef lex_got
9580 #undef lex_got
9581 #endif
9582 /* Parse operands of the form
9583    <symbol>@SECREL32+<nnn>
9584
9585    If we find one, set up the correct relocation in RELOC and copy the
9586    input string, minus the `@SECREL32' into a malloc'd buffer for
9587    parsing by the calling routine.  Return this buffer, and if ADJUST
9588    is non-null set it to the length of the string we removed from the
9589    input line.  Otherwise return NULL.
9590
9591    This function is copied from the ELF version above adjusted for PE targets.  */
9592
9593 static char *
9594 lex_got (enum bfd_reloc_code_real *rel ATTRIBUTE_UNUSED,
9595          int *adjust ATTRIBUTE_UNUSED,
9596          i386_operand_type *types)
9597 {
9598   static const struct
9599   {
9600     const char *str;
9601     int len;
9602     const enum bfd_reloc_code_real rel[2];
9603     const i386_operand_type types64;
9604   }
9605   gotrel[] =
9606   {
9607     { STRING_COMMA_LEN ("SECREL32"),    { BFD_RELOC_32_SECREL,
9608                                           BFD_RELOC_32_SECREL },
9609       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9610   };
9611
9612   char *cp;
9613   unsigned j;
9614
9615   for (cp = input_line_pointer; *cp != '@'; cp++)
9616     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
9617       return NULL;
9618
9619   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
9620     {
9621       int len = gotrel[j].len;
9622
9623       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
9624         {
9625           if (gotrel[j].rel[object_64bit] != 0)
9626             {
9627               int first, second;
9628               char *tmpbuf, *past_reloc;
9629
9630               *rel = gotrel[j].rel[object_64bit];
9631               if (adjust)
9632                 *adjust = len;
9633
9634               if (types)
9635                 {
9636                   if (flag_code != CODE_64BIT)
9637                     {
9638                       types->bitfield.imm32 = 1;
9639                       types->bitfield.disp32 = 1;
9640                     }
9641                   else
9642                     *types = gotrel[j].types64;
9643                 }
9644
9645               /* The length of the first part of our input line.  */
9646               first = cp - input_line_pointer;
9647
9648               /* The second part goes from after the reloc token until
9649                  (and including) an end_of_line char or comma.  */
9650               past_reloc = cp + 1 + len;
9651               cp = past_reloc;
9652               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
9653                 ++cp;
9654               second = cp + 1 - past_reloc;
9655
9656               /* Allocate and copy string.  The trailing NUL shouldn't
9657                  be necessary, but be safe.  */
9658               tmpbuf = XNEWVEC (char, first + second + 2);
9659               memcpy (tmpbuf, input_line_pointer, first);
9660               if (second != 0 && *past_reloc != ' ')
9661                 /* Replace the relocation token with ' ', so that
9662                    errors like foo@SECLREL321 will be detected.  */
9663                 tmpbuf[first++] = ' ';
9664               memcpy (tmpbuf + first, past_reloc, second);
9665               tmpbuf[first + second] = '\0';
9666               return tmpbuf;
9667             }
9668
9669           as_bad (_("@%s reloc is not supported with %d-bit output format"),
9670                   gotrel[j].str, 1 << (5 + object_64bit));
9671           return NULL;
9672         }
9673     }
9674
9675   /* Might be a symbol version string.  Don't as_bad here.  */
9676   return NULL;
9677 }
9678
9679 #endif /* TE_PE */
9680
9681 bfd_reloc_code_real_type
9682 x86_cons (expressionS *exp, int size)
9683 {
9684   bfd_reloc_code_real_type got_reloc = NO_RELOC;
9685
9686   intel_syntax = -intel_syntax;
9687
9688   exp->X_md = 0;
9689   if (size == 4 || (object_64bit && size == 8))
9690     {
9691       /* Handle @GOTOFF and the like in an expression.  */
9692       char *save;
9693       char *gotfree_input_line;
9694       int adjust = 0;
9695
9696       save = input_line_pointer;
9697       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
9698       if (gotfree_input_line)
9699         input_line_pointer = gotfree_input_line;
9700
9701       expression (exp);
9702
9703       if (gotfree_input_line)
9704         {
9705           /* expression () has merrily parsed up to the end of line,
9706              or a comma - in the wrong buffer.  Transfer how far
9707              input_line_pointer has moved to the right buffer.  */
9708           input_line_pointer = (save
9709                                 + (input_line_pointer - gotfree_input_line)
9710                                 + adjust);
9711           free (gotfree_input_line);
9712           if (exp->X_op == O_constant
9713               || exp->X_op == O_absent
9714               || exp->X_op == O_illegal
9715               || exp->X_op == O_register
9716               || exp->X_op == O_big)
9717             {
9718               char c = *input_line_pointer;
9719               *input_line_pointer = 0;
9720               as_bad (_("missing or invalid expression `%s'"), save);
9721               *input_line_pointer = c;
9722             }
9723           else if ((got_reloc == BFD_RELOC_386_PLT32
9724                     || got_reloc == BFD_RELOC_X86_64_PLT32)
9725                    && exp->X_op != O_symbol)
9726             {
9727               char c = *input_line_pointer;
9728               *input_line_pointer = 0;
9729               as_bad (_("invalid PLT expression `%s'"), save);
9730               *input_line_pointer = c;
9731             }
9732         }
9733     }
9734   else
9735     expression (exp);
9736
9737   intel_syntax = -intel_syntax;
9738
9739   if (intel_syntax)
9740     i386_intel_simplify (exp);
9741
9742   return got_reloc;
9743 }
9744
9745 static void
9746 signed_cons (int size)
9747 {
9748   if (flag_code == CODE_64BIT)
9749     cons_sign = 1;
9750   cons (size);
9751   cons_sign = -1;
9752 }
9753
9754 #ifdef TE_PE
9755 static void
9756 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
9757 {
9758   expressionS exp;
9759
9760   do
9761     {
9762       expression (&exp);
9763       if (exp.X_op == O_symbol)
9764         exp.X_op = O_secrel;
9765
9766       emit_expr (&exp, 4);
9767     }
9768   while (*input_line_pointer++ == ',');
9769
9770   input_line_pointer--;
9771   demand_empty_rest_of_line ();
9772 }
9773 #endif
9774
9775 /* Handle Vector operations.  */
9776
9777 static char *
9778 check_VecOperations (char *op_string, char *op_end)
9779 {
9780   const reg_entry *mask;
9781   const char *saved;
9782   char *end_op;
9783
9784   while (*op_string
9785          && (op_end == NULL || op_string < op_end))
9786     {
9787       saved = op_string;
9788       if (*op_string == '{')
9789         {
9790           op_string++;
9791
9792           /* Check broadcasts.  */
9793           if (strncmp (op_string, "1to", 3) == 0)
9794             {
9795               int bcst_type;
9796
9797               if (i.broadcast)
9798                 goto duplicated_vec_op;
9799
9800               op_string += 3;
9801               if (*op_string == '8')
9802                 bcst_type = 8;
9803               else if (*op_string == '4')
9804                 bcst_type = 4;
9805               else if (*op_string == '2')
9806                 bcst_type = 2;
9807               else if (*op_string == '1'
9808                        && *(op_string+1) == '6')
9809                 {
9810                   bcst_type = 16;
9811                   op_string++;
9812                 }
9813               else
9814                 {
9815                   as_bad (_("Unsupported broadcast: `%s'"), saved);
9816                   return NULL;
9817                 }
9818               op_string++;
9819
9820               broadcast_op.type = bcst_type;
9821               broadcast_op.operand = this_operand;
9822               broadcast_op.bytes = 0;
9823               i.broadcast = &broadcast_op;
9824             }
9825           /* Check masking operation.  */
9826           else if ((mask = parse_register (op_string, &end_op)) != NULL)
9827             {
9828               /* k0 can't be used for write mask.  */
9829               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
9830                 {
9831                   as_bad (_("`%s%s' can't be used for write mask"),
9832                           register_prefix, mask->reg_name);
9833                   return NULL;
9834                 }
9835
9836               if (!i.mask)
9837                 {
9838                   mask_op.mask = mask;
9839                   mask_op.zeroing = 0;
9840                   mask_op.operand = this_operand;
9841                   i.mask = &mask_op;
9842                 }
9843               else
9844                 {
9845                   if (i.mask->mask)
9846                     goto duplicated_vec_op;
9847
9848                   i.mask->mask = mask;
9849
9850                   /* Only "{z}" is allowed here.  No need to check
9851                      zeroing mask explicitly.  */
9852                   if (i.mask->operand != this_operand)
9853                     {
9854                       as_bad (_("invalid write mask `%s'"), saved);
9855                       return NULL;
9856                     }
9857                 }
9858
9859               op_string = end_op;
9860             }
9861           /* Check zeroing-flag for masking operation.  */
9862           else if (*op_string == 'z')
9863             {
9864               if (!i.mask)
9865                 {
9866                   mask_op.mask = NULL;
9867                   mask_op.zeroing = 1;
9868                   mask_op.operand = this_operand;
9869                   i.mask = &mask_op;
9870                 }
9871               else
9872                 {
9873                   if (i.mask->zeroing)
9874                     {
9875                     duplicated_vec_op:
9876                       as_bad (_("duplicated `%s'"), saved);
9877                       return NULL;
9878                     }
9879
9880                   i.mask->zeroing = 1;
9881
9882                   /* Only "{%k}" is allowed here.  No need to check mask
9883                      register explicitly.  */
9884                   if (i.mask->operand != this_operand)
9885                     {
9886                       as_bad (_("invalid zeroing-masking `%s'"),
9887                               saved);
9888                       return NULL;
9889                     }
9890                 }
9891
9892               op_string++;
9893             }
9894           else
9895             goto unknown_vec_op;
9896
9897           if (*op_string != '}')
9898             {
9899               as_bad (_("missing `}' in `%s'"), saved);
9900               return NULL;
9901             }
9902           op_string++;
9903
9904           /* Strip whitespace since the addition of pseudo prefixes
9905              changed how the scrubber treats '{'.  */
9906           if (is_space_char (*op_string))
9907             ++op_string;
9908
9909           continue;
9910         }
9911     unknown_vec_op:
9912       /* We don't know this one.  */
9913       as_bad (_("unknown vector operation: `%s'"), saved);
9914       return NULL;
9915     }
9916
9917   if (i.mask && i.mask->zeroing && !i.mask->mask)
9918     {
9919       as_bad (_("zeroing-masking only allowed with write mask"));
9920       return NULL;
9921     }
9922
9923   return op_string;
9924 }
9925
9926 static int
9927 i386_immediate (char *imm_start)
9928 {
9929   char *save_input_line_pointer;
9930   char *gotfree_input_line;
9931   segT exp_seg = 0;
9932   expressionS *exp;
9933   i386_operand_type types;
9934
9935   operand_type_set (&types, ~0);
9936
9937   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
9938     {
9939       as_bad (_("at most %d immediate operands are allowed"),
9940               MAX_IMMEDIATE_OPERANDS);
9941       return 0;
9942     }
9943
9944   exp = &im_expressions[i.imm_operands++];
9945   i.op[this_operand].imms = exp;
9946
9947   if (is_space_char (*imm_start))
9948     ++imm_start;
9949
9950   save_input_line_pointer = input_line_pointer;
9951   input_line_pointer = imm_start;
9952
9953   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
9954   if (gotfree_input_line)
9955     input_line_pointer = gotfree_input_line;
9956
9957   exp_seg = expression (exp);
9958
9959   SKIP_WHITESPACE ();
9960
9961   /* Handle vector operations.  */
9962   if (*input_line_pointer == '{')
9963     {
9964       input_line_pointer = check_VecOperations (input_line_pointer,
9965                                                 NULL);
9966       if (input_line_pointer == NULL)
9967         return 0;
9968     }
9969
9970   if (*input_line_pointer)
9971     as_bad (_("junk `%s' after expression"), input_line_pointer);
9972
9973   input_line_pointer = save_input_line_pointer;
9974   if (gotfree_input_line)
9975     {
9976       free (gotfree_input_line);
9977
9978       if (exp->X_op == O_constant || exp->X_op == O_register)
9979         exp->X_op = O_illegal;
9980     }
9981
9982   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
9983 }
9984
9985 static int
9986 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
9987                          i386_operand_type types, const char *imm_start)
9988 {
9989   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
9990     {
9991       if (imm_start)
9992         as_bad (_("missing or invalid immediate expression `%s'"),
9993                 imm_start);
9994       return 0;
9995     }
9996   else if (exp->X_op == O_constant)
9997     {
9998       /* Size it properly later.  */
9999       i.types[this_operand].bitfield.imm64 = 1;
10000       /* If not 64bit, sign extend val.  */
10001       if (flag_code != CODE_64BIT
10002           && (exp->X_add_number & ~(((addressT) 2 << 31) - 1)) == 0)
10003         exp->X_add_number
10004           = (exp->X_add_number ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
10005     }
10006 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10007   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10008            && exp_seg != absolute_section
10009            && exp_seg != text_section
10010            && exp_seg != data_section
10011            && exp_seg != bss_section
10012            && exp_seg != undefined_section
10013            && !bfd_is_com_section (exp_seg))
10014     {
10015       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10016       return 0;
10017     }
10018 #endif
10019   else if (!intel_syntax && exp_seg == reg_section)
10020     {
10021       if (imm_start)
10022         as_bad (_("illegal immediate register operand %s"), imm_start);
10023       return 0;
10024     }
10025   else
10026     {
10027       /* This is an address.  The size of the address will be
10028          determined later, depending on destination register,
10029          suffix, or the default for the section.  */
10030       i.types[this_operand].bitfield.imm8 = 1;
10031       i.types[this_operand].bitfield.imm16 = 1;
10032       i.types[this_operand].bitfield.imm32 = 1;
10033       i.types[this_operand].bitfield.imm32s = 1;
10034       i.types[this_operand].bitfield.imm64 = 1;
10035       i.types[this_operand] = operand_type_and (i.types[this_operand],
10036                                                 types);
10037     }
10038
10039   return 1;
10040 }
10041
10042 static char *
10043 i386_scale (char *scale)
10044 {
10045   offsetT val;
10046   char *save = input_line_pointer;
10047
10048   input_line_pointer = scale;
10049   val = get_absolute_expression ();
10050
10051   switch (val)
10052     {
10053     case 1:
10054       i.log2_scale_factor = 0;
10055       break;
10056     case 2:
10057       i.log2_scale_factor = 1;
10058       break;
10059     case 4:
10060       i.log2_scale_factor = 2;
10061       break;
10062     case 8:
10063       i.log2_scale_factor = 3;
10064       break;
10065     default:
10066       {
10067         char sep = *input_line_pointer;
10068
10069         *input_line_pointer = '\0';
10070         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10071                 scale);
10072         *input_line_pointer = sep;
10073         input_line_pointer = save;
10074         return NULL;
10075       }
10076     }
10077   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10078     {
10079       as_warn (_("scale factor of %d without an index register"),
10080                1 << i.log2_scale_factor);
10081       i.log2_scale_factor = 0;
10082     }
10083   scale = input_line_pointer;
10084   input_line_pointer = save;
10085   return scale;
10086 }
10087
10088 static int
10089 i386_displacement (char *disp_start, char *disp_end)
10090 {
10091   expressionS *exp;
10092   segT exp_seg = 0;
10093   char *save_input_line_pointer;
10094   char *gotfree_input_line;
10095   int override;
10096   i386_operand_type bigdisp, types = anydisp;
10097   int ret;
10098
10099   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10100     {
10101       as_bad (_("at most %d displacement operands are allowed"),
10102               MAX_MEMORY_OPERANDS);
10103       return 0;
10104     }
10105
10106   operand_type_set (&bigdisp, 0);
10107   if (i.jumpabsolute
10108       || i.types[this_operand].bitfield.baseindex
10109       || (current_templates->start->opcode_modifier.jump != JUMP
10110           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10111     {
10112       i386_addressing_mode ();
10113       override = (i.prefix[ADDR_PREFIX] != 0);
10114       if (flag_code == CODE_64BIT)
10115         {
10116           if (!override)
10117             {
10118               bigdisp.bitfield.disp32s = 1;
10119               bigdisp.bitfield.disp64 = 1;
10120             }
10121           else
10122             bigdisp.bitfield.disp32 = 1;
10123         }
10124       else if ((flag_code == CODE_16BIT) ^ override)
10125           bigdisp.bitfield.disp16 = 1;
10126       else
10127           bigdisp.bitfield.disp32 = 1;
10128     }
10129   else
10130     {
10131       /* For PC-relative branches, the width of the displacement may be
10132          dependent upon data size, but is never dependent upon address size.
10133          Also make sure to not unintentionally match against a non-PC-relative
10134          branch template.  */
10135       static templates aux_templates;
10136       const insn_template *t = current_templates->start;
10137       bfd_boolean has_intel64 = FALSE;
10138
10139       aux_templates.start = t;
10140       while (++t < current_templates->end)
10141         {
10142           if (t->opcode_modifier.jump
10143               != current_templates->start->opcode_modifier.jump)
10144             break;
10145           if ((t->opcode_modifier.isa64 >= INTEL64))
10146             has_intel64 = TRUE;
10147         }
10148       if (t < current_templates->end)
10149         {
10150           aux_templates.end = t;
10151           current_templates = &aux_templates;
10152         }
10153
10154       override = (i.prefix[DATA_PREFIX] != 0);
10155       if (flag_code == CODE_64BIT)
10156         {
10157           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10158               && (!intel64 || !has_intel64))
10159             bigdisp.bitfield.disp16 = 1;
10160           else
10161             bigdisp.bitfield.disp32s = 1;
10162         }
10163       else
10164         {
10165           if (!override)
10166             override = (i.suffix == (flag_code != CODE_16BIT
10167                                      ? WORD_MNEM_SUFFIX
10168                                      : LONG_MNEM_SUFFIX));
10169           bigdisp.bitfield.disp32 = 1;
10170           if ((flag_code == CODE_16BIT) ^ override)
10171             {
10172               bigdisp.bitfield.disp32 = 0;
10173               bigdisp.bitfield.disp16 = 1;
10174             }
10175         }
10176     }
10177   i.types[this_operand] = operand_type_or (i.types[this_operand],
10178                                            bigdisp);
10179
10180   exp = &disp_expressions[i.disp_operands];
10181   i.op[this_operand].disps = exp;
10182   i.disp_operands++;
10183   save_input_line_pointer = input_line_pointer;
10184   input_line_pointer = disp_start;
10185   END_STRING_AND_SAVE (disp_end);
10186
10187 #ifndef GCC_ASM_O_HACK
10188 #define GCC_ASM_O_HACK 0
10189 #endif
10190 #if GCC_ASM_O_HACK
10191   END_STRING_AND_SAVE (disp_end + 1);
10192   if (i.types[this_operand].bitfield.baseIndex
10193       && displacement_string_end[-1] == '+')
10194     {
10195       /* This hack is to avoid a warning when using the "o"
10196          constraint within gcc asm statements.
10197          For instance:
10198
10199          #define _set_tssldt_desc(n,addr,limit,type) \
10200          __asm__ __volatile__ ( \
10201          "movw %w2,%0\n\t" \
10202          "movw %w1,2+%0\n\t" \
10203          "rorl $16,%1\n\t" \
10204          "movb %b1,4+%0\n\t" \
10205          "movb %4,5+%0\n\t" \
10206          "movb $0,6+%0\n\t" \
10207          "movb %h1,7+%0\n\t" \
10208          "rorl $16,%1" \
10209          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10210
10211          This works great except that the output assembler ends
10212          up looking a bit weird if it turns out that there is
10213          no offset.  You end up producing code that looks like:
10214
10215          #APP
10216          movw $235,(%eax)
10217          movw %dx,2+(%eax)
10218          rorl $16,%edx
10219          movb %dl,4+(%eax)
10220          movb $137,5+(%eax)
10221          movb $0,6+(%eax)
10222          movb %dh,7+(%eax)
10223          rorl $16,%edx
10224          #NO_APP
10225
10226          So here we provide the missing zero.  */
10227
10228       *displacement_string_end = '0';
10229     }
10230 #endif
10231   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10232   if (gotfree_input_line)
10233     input_line_pointer = gotfree_input_line;
10234
10235   exp_seg = expression (exp);
10236
10237   SKIP_WHITESPACE ();
10238   if (*input_line_pointer)
10239     as_bad (_("junk `%s' after expression"), input_line_pointer);
10240 #if GCC_ASM_O_HACK
10241   RESTORE_END_STRING (disp_end + 1);
10242 #endif
10243   input_line_pointer = save_input_line_pointer;
10244   if (gotfree_input_line)
10245     {
10246       free (gotfree_input_line);
10247
10248       if (exp->X_op == O_constant || exp->X_op == O_register)
10249         exp->X_op = O_illegal;
10250     }
10251
10252   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10253
10254   RESTORE_END_STRING (disp_end);
10255
10256   return ret;
10257 }
10258
10259 static int
10260 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10261                             i386_operand_type types, const char *disp_start)
10262 {
10263   i386_operand_type bigdisp;
10264   int ret = 1;
10265
10266   /* We do this to make sure that the section symbol is in
10267      the symbol table.  We will ultimately change the relocation
10268      to be relative to the beginning of the section.  */
10269   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10270       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10271       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10272     {
10273       if (exp->X_op != O_symbol)
10274         goto inv_disp;
10275
10276       if (S_IS_LOCAL (exp->X_add_symbol)
10277           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
10278           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
10279         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
10280       exp->X_op = O_subtract;
10281       exp->X_op_symbol = GOT_symbol;
10282       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
10283         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
10284       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10285         i.reloc[this_operand] = BFD_RELOC_64;
10286       else
10287         i.reloc[this_operand] = BFD_RELOC_32;
10288     }
10289
10290   else if (exp->X_op == O_absent
10291            || exp->X_op == O_illegal
10292            || exp->X_op == O_big)
10293     {
10294     inv_disp:
10295       as_bad (_("missing or invalid displacement expression `%s'"),
10296               disp_start);
10297       ret = 0;
10298     }
10299
10300   else if (flag_code == CODE_64BIT
10301            && !i.prefix[ADDR_PREFIX]
10302            && exp->X_op == O_constant)
10303     {
10304       /* Since displacement is signed extended to 64bit, don't allow
10305          disp32 and turn off disp32s if they are out of range.  */
10306       i.types[this_operand].bitfield.disp32 = 0;
10307       if (!fits_in_signed_long (exp->X_add_number))
10308         {
10309           i.types[this_operand].bitfield.disp32s = 0;
10310           if (i.types[this_operand].bitfield.baseindex)
10311             {
10312               as_bad (_("0x%lx out range of signed 32bit displacement"),
10313                       (long) exp->X_add_number);
10314               ret = 0;
10315             }
10316         }
10317     }
10318
10319 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10320   else if (exp->X_op != O_constant
10321            && OUTPUT_FLAVOR == bfd_target_aout_flavour
10322            && exp_seg != absolute_section
10323            && exp_seg != text_section
10324            && exp_seg != data_section
10325            && exp_seg != bss_section
10326            && exp_seg != undefined_section
10327            && !bfd_is_com_section (exp_seg))
10328     {
10329       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10330       ret = 0;
10331     }
10332 #endif
10333
10334   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
10335       /* Constants get taken care of by optimize_disp().  */
10336       && exp->X_op != O_constant)
10337     i.types[this_operand].bitfield.disp8 = 1;
10338
10339   /* Check if this is a displacement only operand.  */
10340   bigdisp = i.types[this_operand];
10341   bigdisp.bitfield.disp8 = 0;
10342   bigdisp.bitfield.disp16 = 0;
10343   bigdisp.bitfield.disp32 = 0;
10344   bigdisp.bitfield.disp32s = 0;
10345   bigdisp.bitfield.disp64 = 0;
10346   if (operand_type_all_zero (&bigdisp))
10347     i.types[this_operand] = operand_type_and (i.types[this_operand],
10348                                               types);
10349
10350   return ret;
10351 }
10352
10353 /* Return the active addressing mode, taking address override and
10354    registers forming the address into consideration.  Update the
10355    address override prefix if necessary.  */
10356
10357 static enum flag_code
10358 i386_addressing_mode (void)
10359 {
10360   enum flag_code addr_mode;
10361
10362   if (i.prefix[ADDR_PREFIX])
10363     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
10364   else if (flag_code == CODE_16BIT
10365            && current_templates->start->cpu_flags.bitfield.cpumpx
10366            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
10367               from md_assemble() by "is not a valid base/index expression"
10368               when there is a base and/or index.  */
10369            && !i.types[this_operand].bitfield.baseindex)
10370     {
10371       /* MPX insn memory operands with neither base nor index must be forced
10372          to use 32-bit addressing in 16-bit mode.  */
10373       addr_mode = CODE_32BIT;
10374       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10375       ++i.prefixes;
10376       gas_assert (!i.types[this_operand].bitfield.disp16);
10377       gas_assert (!i.types[this_operand].bitfield.disp32);
10378     }
10379   else
10380     {
10381       addr_mode = flag_code;
10382
10383 #if INFER_ADDR_PREFIX
10384       if (i.mem_operands == 0)
10385         {
10386           /* Infer address prefix from the first memory operand.  */
10387           const reg_entry *addr_reg = i.base_reg;
10388
10389           if (addr_reg == NULL)
10390             addr_reg = i.index_reg;
10391
10392           if (addr_reg)
10393             {
10394               if (addr_reg->reg_type.bitfield.dword)
10395                 addr_mode = CODE_32BIT;
10396               else if (flag_code != CODE_64BIT
10397                        && addr_reg->reg_type.bitfield.word)
10398                 addr_mode = CODE_16BIT;
10399
10400               if (addr_mode != flag_code)
10401                 {
10402                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10403                   i.prefixes += 1;
10404                   /* Change the size of any displacement too.  At most one
10405                      of Disp16 or Disp32 is set.
10406                      FIXME.  There doesn't seem to be any real need for
10407                      separate Disp16 and Disp32 flags.  The same goes for
10408                      Imm16 and Imm32.  Removing them would probably clean
10409                      up the code quite a lot.  */
10410                   if (flag_code != CODE_64BIT
10411                       && (i.types[this_operand].bitfield.disp16
10412                           || i.types[this_operand].bitfield.disp32))
10413                     i.types[this_operand]
10414                       = operand_type_xor (i.types[this_operand], disp16_32);
10415                 }
10416             }
10417         }
10418 #endif
10419     }
10420
10421   return addr_mode;
10422 }
10423
10424 /* Make sure the memory operand we've been dealt is valid.
10425    Return 1 on success, 0 on a failure.  */
10426
10427 static int
10428 i386_index_check (const char *operand_string)
10429 {
10430   const char *kind = "base/index";
10431   enum flag_code addr_mode = i386_addressing_mode ();
10432
10433   if (current_templates->start->opcode_modifier.isstring
10434       && !current_templates->start->cpu_flags.bitfield.cpupadlock
10435       && (current_templates->end[-1].opcode_modifier.isstring
10436           || i.mem_operands))
10437     {
10438       /* Memory operands of string insns are special in that they only allow
10439          a single register (rDI, rSI, or rBX) as their memory address.  */
10440       const reg_entry *expected_reg;
10441       static const char *di_si[][2] =
10442         {
10443           { "esi", "edi" },
10444           { "si", "di" },
10445           { "rsi", "rdi" }
10446         };
10447       static const char *bx[] = { "ebx", "bx", "rbx" };
10448
10449       kind = "string address";
10450
10451       if (current_templates->start->opcode_modifier.repprefixok)
10452         {
10453           int es_op = current_templates->end[-1].opcode_modifier.isstring
10454                       - IS_STRING_ES_OP0;
10455           int op = 0;
10456
10457           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
10458               || ((!i.mem_operands != !intel_syntax)
10459                   && current_templates->end[-1].operand_types[1]
10460                      .bitfield.baseindex))
10461             op = 1;
10462           expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]);
10463         }
10464       else
10465         expected_reg = hash_find (reg_hash, bx[addr_mode]);
10466
10467       if (i.base_reg != expected_reg
10468           || i.index_reg
10469           || operand_type_check (i.types[this_operand], disp))
10470         {
10471           /* The second memory operand must have the same size as
10472              the first one.  */
10473           if (i.mem_operands
10474               && i.base_reg
10475               && !((addr_mode == CODE_64BIT
10476                     && i.base_reg->reg_type.bitfield.qword)
10477                    || (addr_mode == CODE_32BIT
10478                        ? i.base_reg->reg_type.bitfield.dword
10479                        : i.base_reg->reg_type.bitfield.word)))
10480             goto bad_address;
10481
10482           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
10483                    operand_string,
10484                    intel_syntax ? '[' : '(',
10485                    register_prefix,
10486                    expected_reg->reg_name,
10487                    intel_syntax ? ']' : ')');
10488           return 1;
10489         }
10490       else
10491         return 1;
10492
10493     bad_address:
10494       as_bad (_("`%s' is not a valid %s expression"),
10495               operand_string, kind);
10496       return 0;
10497     }
10498   else
10499     {
10500       if (addr_mode != CODE_16BIT)
10501         {
10502           /* 32-bit/64-bit checks.  */
10503           if ((i.base_reg
10504                && ((addr_mode == CODE_64BIT
10505                     ? !i.base_reg->reg_type.bitfield.qword
10506                     : !i.base_reg->reg_type.bitfield.dword)
10507                    || (i.index_reg && i.base_reg->reg_num == RegIP)
10508                    || i.base_reg->reg_num == RegIZ))
10509               || (i.index_reg
10510                   && !i.index_reg->reg_type.bitfield.xmmword
10511                   && !i.index_reg->reg_type.bitfield.ymmword
10512                   && !i.index_reg->reg_type.bitfield.zmmword
10513                   && ((addr_mode == CODE_64BIT
10514                        ? !i.index_reg->reg_type.bitfield.qword
10515                        : !i.index_reg->reg_type.bitfield.dword)
10516                       || !i.index_reg->reg_type.bitfield.baseindex)))
10517             goto bad_address;
10518
10519           /* bndmk, bndldx, and bndstx have special restrictions. */
10520           if (current_templates->start->base_opcode == 0xf30f1b
10521               || (current_templates->start->base_opcode & ~1) == 0x0f1a)
10522             {
10523               /* They cannot use RIP-relative addressing. */
10524               if (i.base_reg && i.base_reg->reg_num == RegIP)
10525                 {
10526                   as_bad (_("`%s' cannot be used here"), operand_string);
10527                   return 0;
10528                 }
10529
10530               /* bndldx and bndstx ignore their scale factor. */
10531               if (current_templates->start->base_opcode != 0xf30f1b
10532                   && i.log2_scale_factor)
10533                 as_warn (_("register scaling is being ignored here"));
10534             }
10535         }
10536       else
10537         {
10538           /* 16-bit checks.  */
10539           if ((i.base_reg
10540                && (!i.base_reg->reg_type.bitfield.word
10541                    || !i.base_reg->reg_type.bitfield.baseindex))
10542               || (i.index_reg
10543                   && (!i.index_reg->reg_type.bitfield.word
10544                       || !i.index_reg->reg_type.bitfield.baseindex
10545                       || !(i.base_reg
10546                            && i.base_reg->reg_num < 6
10547                            && i.index_reg->reg_num >= 6
10548                            && i.log2_scale_factor == 0))))
10549             goto bad_address;
10550         }
10551     }
10552   return 1;
10553 }
10554
10555 /* Handle vector immediates.  */
10556
10557 static int
10558 RC_SAE_immediate (const char *imm_start)
10559 {
10560   unsigned int match_found, j;
10561   const char *pstr = imm_start;
10562   expressionS *exp;
10563
10564   if (*pstr != '{')
10565     return 0;
10566
10567   pstr++;
10568   match_found = 0;
10569   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10570     {
10571       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10572         {
10573           if (!i.rounding)
10574             {
10575               rc_op.type = RC_NamesTable[j].type;
10576               rc_op.operand = this_operand;
10577               i.rounding = &rc_op;
10578             }
10579           else
10580             {
10581               as_bad (_("duplicated `%s'"), imm_start);
10582               return 0;
10583             }
10584           pstr += RC_NamesTable[j].len;
10585           match_found = 1;
10586           break;
10587         }
10588     }
10589   if (!match_found)
10590     return 0;
10591
10592   if (*pstr++ != '}')
10593     {
10594       as_bad (_("Missing '}': '%s'"), imm_start);
10595       return 0;
10596     }
10597   /* RC/SAE immediate string should contain nothing more.  */;
10598   if (*pstr != 0)
10599     {
10600       as_bad (_("Junk after '}': '%s'"), imm_start);
10601       return 0;
10602     }
10603
10604   exp = &im_expressions[i.imm_operands++];
10605   i.op[this_operand].imms = exp;
10606
10607   exp->X_op = O_constant;
10608   exp->X_add_number = 0;
10609   exp->X_add_symbol = (symbolS *) 0;
10610   exp->X_op_symbol = (symbolS *) 0;
10611
10612   i.types[this_operand].bitfield.imm8 = 1;
10613   return 1;
10614 }
10615
10616 /* Only string instructions can have a second memory operand, so
10617    reduce current_templates to just those if it contains any.  */
10618 static int
10619 maybe_adjust_templates (void)
10620 {
10621   const insn_template *t;
10622
10623   gas_assert (i.mem_operands == 1);
10624
10625   for (t = current_templates->start; t < current_templates->end; ++t)
10626     if (t->opcode_modifier.isstring)
10627       break;
10628
10629   if (t < current_templates->end)
10630     {
10631       static templates aux_templates;
10632       bfd_boolean recheck;
10633
10634       aux_templates.start = t;
10635       for (; t < current_templates->end; ++t)
10636         if (!t->opcode_modifier.isstring)
10637           break;
10638       aux_templates.end = t;
10639
10640       /* Determine whether to re-check the first memory operand.  */
10641       recheck = (aux_templates.start != current_templates->start
10642                  || t != current_templates->end);
10643
10644       current_templates = &aux_templates;
10645
10646       if (recheck)
10647         {
10648           i.mem_operands = 0;
10649           if (i.memop1_string != NULL
10650               && i386_index_check (i.memop1_string) == 0)
10651             return 0;
10652           i.mem_operands = 1;
10653         }
10654     }
10655
10656   return 1;
10657 }
10658
10659 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
10660    on error.  */
10661
10662 static int
10663 i386_att_operand (char *operand_string)
10664 {
10665   const reg_entry *r;
10666   char *end_op;
10667   char *op_string = operand_string;
10668
10669   if (is_space_char (*op_string))
10670     ++op_string;
10671
10672   /* We check for an absolute prefix (differentiating,
10673      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
10674   if (*op_string == ABSOLUTE_PREFIX)
10675     {
10676       ++op_string;
10677       if (is_space_char (*op_string))
10678         ++op_string;
10679       i.jumpabsolute = TRUE;
10680     }
10681
10682   /* Check if operand is a register.  */
10683   if ((r = parse_register (op_string, &end_op)) != NULL)
10684     {
10685       i386_operand_type temp;
10686
10687       /* Check for a segment override by searching for ':' after a
10688          segment register.  */
10689       op_string = end_op;
10690       if (is_space_char (*op_string))
10691         ++op_string;
10692       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
10693         {
10694           switch (r->reg_num)
10695             {
10696             case 0:
10697               i.seg[i.mem_operands] = &es;
10698               break;
10699             case 1:
10700               i.seg[i.mem_operands] = &cs;
10701               break;
10702             case 2:
10703               i.seg[i.mem_operands] = &ss;
10704               break;
10705             case 3:
10706               i.seg[i.mem_operands] = &ds;
10707               break;
10708             case 4:
10709               i.seg[i.mem_operands] = &fs;
10710               break;
10711             case 5:
10712               i.seg[i.mem_operands] = &gs;
10713               break;
10714             }
10715
10716           /* Skip the ':' and whitespace.  */
10717           ++op_string;
10718           if (is_space_char (*op_string))
10719             ++op_string;
10720
10721           if (!is_digit_char (*op_string)
10722               && !is_identifier_char (*op_string)
10723               && *op_string != '('
10724               && *op_string != ABSOLUTE_PREFIX)
10725             {
10726               as_bad (_("bad memory operand `%s'"), op_string);
10727               return 0;
10728             }
10729           /* Handle case of %es:*foo.  */
10730           if (*op_string == ABSOLUTE_PREFIX)
10731             {
10732               ++op_string;
10733               if (is_space_char (*op_string))
10734                 ++op_string;
10735               i.jumpabsolute = TRUE;
10736             }
10737           goto do_memory_reference;
10738         }
10739
10740       /* Handle vector operations.  */
10741       if (*op_string == '{')
10742         {
10743           op_string = check_VecOperations (op_string, NULL);
10744           if (op_string == NULL)
10745             return 0;
10746         }
10747
10748       if (*op_string)
10749         {
10750           as_bad (_("junk `%s' after register"), op_string);
10751           return 0;
10752         }
10753       temp = r->reg_type;
10754       temp.bitfield.baseindex = 0;
10755       i.types[this_operand] = operand_type_or (i.types[this_operand],
10756                                                temp);
10757       i.types[this_operand].bitfield.unspecified = 0;
10758       i.op[this_operand].regs = r;
10759       i.reg_operands++;
10760     }
10761   else if (*op_string == REGISTER_PREFIX)
10762     {
10763       as_bad (_("bad register name `%s'"), op_string);
10764       return 0;
10765     }
10766   else if (*op_string == IMMEDIATE_PREFIX)
10767     {
10768       ++op_string;
10769       if (i.jumpabsolute)
10770         {
10771           as_bad (_("immediate operand illegal with absolute jump"));
10772           return 0;
10773         }
10774       if (!i386_immediate (op_string))
10775         return 0;
10776     }
10777   else if (RC_SAE_immediate (operand_string))
10778     {
10779       /* If it is a RC or SAE immediate, do nothing.  */
10780       ;
10781     }
10782   else if (is_digit_char (*op_string)
10783            || is_identifier_char (*op_string)
10784            || *op_string == '"'
10785            || *op_string == '(')
10786     {
10787       /* This is a memory reference of some sort.  */
10788       char *base_string;
10789
10790       /* Start and end of displacement string expression (if found).  */
10791       char *displacement_string_start;
10792       char *displacement_string_end;
10793       char *vop_start;
10794
10795     do_memory_reference:
10796       if (i.mem_operands == 1 && !maybe_adjust_templates ())
10797         return 0;
10798       if ((i.mem_operands == 1
10799            && !current_templates->start->opcode_modifier.isstring)
10800           || i.mem_operands == 2)
10801         {
10802           as_bad (_("too many memory references for `%s'"),
10803                   current_templates->start->name);
10804           return 0;
10805         }
10806
10807       /* Check for base index form.  We detect the base index form by
10808          looking for an ')' at the end of the operand, searching
10809          for the '(' matching it, and finding a REGISTER_PREFIX or ','
10810          after the '('.  */
10811       base_string = op_string + strlen (op_string);
10812
10813       /* Handle vector operations.  */
10814       vop_start = strchr (op_string, '{');
10815       if (vop_start && vop_start < base_string)
10816         {
10817           if (check_VecOperations (vop_start, base_string) == NULL)
10818             return 0;
10819           base_string = vop_start;
10820         }
10821
10822       --base_string;
10823       if (is_space_char (*base_string))
10824         --base_string;
10825
10826       /* If we only have a displacement, set-up for it to be parsed later.  */
10827       displacement_string_start = op_string;
10828       displacement_string_end = base_string + 1;
10829
10830       if (*base_string == ')')
10831         {
10832           char *temp_string;
10833           unsigned int parens_balanced = 1;
10834           /* We've already checked that the number of left & right ()'s are
10835              equal, so this loop will not be infinite.  */
10836           do
10837             {
10838               base_string--;
10839               if (*base_string == ')')
10840                 parens_balanced++;
10841               if (*base_string == '(')
10842                 parens_balanced--;
10843             }
10844           while (parens_balanced);
10845
10846           temp_string = base_string;
10847
10848           /* Skip past '(' and whitespace.  */
10849           ++base_string;
10850           if (is_space_char (*base_string))
10851             ++base_string;
10852
10853           if (*base_string == ','
10854               || ((i.base_reg = parse_register (base_string, &end_op))
10855                   != NULL))
10856             {
10857               displacement_string_end = temp_string;
10858
10859               i.types[this_operand].bitfield.baseindex = 1;
10860
10861               if (i.base_reg)
10862                 {
10863                   base_string = end_op;
10864                   if (is_space_char (*base_string))
10865                     ++base_string;
10866                 }
10867
10868               /* There may be an index reg or scale factor here.  */
10869               if (*base_string == ',')
10870                 {
10871                   ++base_string;
10872                   if (is_space_char (*base_string))
10873                     ++base_string;
10874
10875                   if ((i.index_reg = parse_register (base_string, &end_op))
10876                       != NULL)
10877                     {
10878                       base_string = end_op;
10879                       if (is_space_char (*base_string))
10880                         ++base_string;
10881                       if (*base_string == ',')
10882                         {
10883                           ++base_string;
10884                           if (is_space_char (*base_string))
10885                             ++base_string;
10886                         }
10887                       else if (*base_string != ')')
10888                         {
10889                           as_bad (_("expecting `,' or `)' "
10890                                     "after index register in `%s'"),
10891                                   operand_string);
10892                           return 0;
10893                         }
10894                     }
10895                   else if (*base_string == REGISTER_PREFIX)
10896                     {
10897                       end_op = strchr (base_string, ',');
10898                       if (end_op)
10899                         *end_op = '\0';
10900                       as_bad (_("bad register name `%s'"), base_string);
10901                       return 0;
10902                     }
10903
10904                   /* Check for scale factor.  */
10905                   if (*base_string != ')')
10906                     {
10907                       char *end_scale = i386_scale (base_string);
10908
10909                       if (!end_scale)
10910                         return 0;
10911
10912                       base_string = end_scale;
10913                       if (is_space_char (*base_string))
10914                         ++base_string;
10915                       if (*base_string != ')')
10916                         {
10917                           as_bad (_("expecting `)' "
10918                                     "after scale factor in `%s'"),
10919                                   operand_string);
10920                           return 0;
10921                         }
10922                     }
10923                   else if (!i.index_reg)
10924                     {
10925                       as_bad (_("expecting index register or scale factor "
10926                                 "after `,'; got '%c'"),
10927                               *base_string);
10928                       return 0;
10929                     }
10930                 }
10931               else if (*base_string != ')')
10932                 {
10933                   as_bad (_("expecting `,' or `)' "
10934                             "after base register in `%s'"),
10935                           operand_string);
10936                   return 0;
10937                 }
10938             }
10939           else if (*base_string == REGISTER_PREFIX)
10940             {
10941               end_op = strchr (base_string, ',');
10942               if (end_op)
10943                 *end_op = '\0';
10944               as_bad (_("bad register name `%s'"), base_string);
10945               return 0;
10946             }
10947         }
10948
10949       /* If there's an expression beginning the operand, parse it,
10950          assuming displacement_string_start and
10951          displacement_string_end are meaningful.  */
10952       if (displacement_string_start != displacement_string_end)
10953         {
10954           if (!i386_displacement (displacement_string_start,
10955                                   displacement_string_end))
10956             return 0;
10957         }
10958
10959       /* Special case for (%dx) while doing input/output op.  */
10960       if (i.base_reg
10961           && i.base_reg->reg_type.bitfield.instance == RegD
10962           && i.base_reg->reg_type.bitfield.word
10963           && i.index_reg == 0
10964           && i.log2_scale_factor == 0
10965           && i.seg[i.mem_operands] == 0
10966           && !operand_type_check (i.types[this_operand], disp))
10967         {
10968           i.types[this_operand] = i.base_reg->reg_type;
10969           return 1;
10970         }
10971
10972       if (i386_index_check (operand_string) == 0)
10973         return 0;
10974       i.flags[this_operand] |= Operand_Mem;
10975       if (i.mem_operands == 0)
10976         i.memop1_string = xstrdup (operand_string);
10977       i.mem_operands++;
10978     }
10979   else
10980     {
10981       /* It's not a memory operand; argh!  */
10982       as_bad (_("invalid char %s beginning operand %d `%s'"),
10983               output_invalid (*op_string),
10984               this_operand + 1,
10985               op_string);
10986       return 0;
10987     }
10988   return 1;                     /* Normal return.  */
10989 }
10990 \f
10991 /* Calculate the maximum variable size (i.e., excluding fr_fix)
10992    that an rs_machine_dependent frag may reach.  */
10993
10994 unsigned int
10995 i386_frag_max_var (fragS *frag)
10996 {
10997   /* The only relaxable frags are for jumps.
10998      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
10999   gas_assert (frag->fr_type == rs_machine_dependent);
11000   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11001 }
11002
11003 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11004 static int
11005 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11006 {
11007   /* STT_GNU_IFUNC symbol must go through PLT.  */
11008   if ((symbol_get_bfdsym (fr_symbol)->flags
11009        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11010     return 0;
11011
11012   if (!S_IS_EXTERNAL (fr_symbol))
11013     /* Symbol may be weak or local.  */
11014     return !S_IS_WEAK (fr_symbol);
11015
11016   /* Global symbols with non-default visibility can't be preempted. */
11017   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11018     return 1;
11019
11020   if (fr_var != NO_RELOC)
11021     switch ((enum bfd_reloc_code_real) fr_var)
11022       {
11023       case BFD_RELOC_386_PLT32:
11024       case BFD_RELOC_X86_64_PLT32:
11025         /* Symbol with PLT relocation may be preempted. */
11026         return 0;
11027       default:
11028         abort ();
11029       }
11030
11031   /* Global symbols with default visibility in a shared library may be
11032      preempted by another definition.  */
11033   return !shared;
11034 }
11035 #endif
11036
11037 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11038    Note also work for Skylake and Cascadelake.
11039 ---------------------------------------------------------------------
11040 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11041 | ------  | ----------- | ------- | -------- |
11042 |   Jo    |      N      |    N    |     Y    |
11043 |   Jno   |      N      |    N    |     Y    |
11044 |  Jc/Jb  |      Y      |    N    |     Y    |
11045 | Jae/Jnb |      Y      |    N    |     Y    |
11046 |  Je/Jz  |      Y      |    Y    |     Y    |
11047 | Jne/Jnz |      Y      |    Y    |     Y    |
11048 | Jna/Jbe |      Y      |    N    |     Y    |
11049 | Ja/Jnbe |      Y      |    N    |     Y    |
11050 |   Js    |      N      |    N    |     Y    |
11051 |   Jns   |      N      |    N    |     Y    |
11052 |  Jp/Jpe |      N      |    N    |     Y    |
11053 | Jnp/Jpo |      N      |    N    |     Y    |
11054 | Jl/Jnge |      Y      |    Y    |     Y    |
11055 | Jge/Jnl |      Y      |    Y    |     Y    |
11056 | Jle/Jng |      Y      |    Y    |     Y    |
11057 | Jg/Jnle |      Y      |    Y    |     Y    |
11058 ---------------------------------------------------------------------  */
11059 static int
11060 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11061 {
11062   if (mf_cmp == mf_cmp_alu_cmp)
11063     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11064             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11065   if (mf_cmp == mf_cmp_incdec)
11066     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11067             || mf_jcc == mf_jcc_jle);
11068   if (mf_cmp == mf_cmp_test_and)
11069     return 1;
11070   return 0;
11071 }
11072
11073 /* Return the next non-empty frag.  */
11074
11075 static fragS *
11076 i386_next_non_empty_frag (fragS *fragP)
11077 {
11078   /* There may be a frag with a ".fill 0" when there is no room in
11079      the current frag for frag_grow in output_insn.  */
11080   for (fragP = fragP->fr_next;
11081        (fragP != NULL
11082         && fragP->fr_type == rs_fill
11083         && fragP->fr_fix == 0);
11084        fragP = fragP->fr_next)
11085     ;
11086   return fragP;
11087 }
11088
11089 /* Return the next jcc frag after BRANCH_PADDING.  */
11090
11091 static fragS *
11092 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11093 {
11094   fragS *branch_fragP;
11095   if (!pad_fragP)
11096     return NULL;
11097
11098   if (pad_fragP->fr_type == rs_machine_dependent
11099       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11100           == BRANCH_PADDING))
11101     {
11102       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11103       if (branch_fragP->fr_type != rs_machine_dependent)
11104         return NULL;
11105       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11106           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11107                                    pad_fragP->tc_frag_data.mf_type))
11108         return branch_fragP;
11109     }
11110
11111   return NULL;
11112 }
11113
11114 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11115
11116 static void
11117 i386_classify_machine_dependent_frag (fragS *fragP)
11118 {
11119   fragS *cmp_fragP;
11120   fragS *pad_fragP;
11121   fragS *branch_fragP;
11122   fragS *next_fragP;
11123   unsigned int max_prefix_length;
11124
11125   if (fragP->tc_frag_data.classified)
11126     return;
11127
11128   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11129      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11130   for (next_fragP = fragP;
11131        next_fragP != NULL;
11132        next_fragP = next_fragP->fr_next)
11133     {
11134       next_fragP->tc_frag_data.classified = 1;
11135       if (next_fragP->fr_type == rs_machine_dependent)
11136         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11137           {
11138           case BRANCH_PADDING:
11139             /* The BRANCH_PADDING frag must be followed by a branch
11140                frag.  */
11141             branch_fragP = i386_next_non_empty_frag (next_fragP);
11142             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11143             break;
11144           case FUSED_JCC_PADDING:
11145             /* Check if this is a fused jcc:
11146                FUSED_JCC_PADDING
11147                CMP like instruction
11148                BRANCH_PADDING
11149                COND_JUMP
11150                */
11151             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11152             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11153             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11154             if (branch_fragP)
11155               {
11156                 /* The BRANCH_PADDING frag is merged with the
11157                    FUSED_JCC_PADDING frag.  */
11158                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11159                 /* CMP like instruction size.  */
11160                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11161                 frag_wane (pad_fragP);
11162                 /* Skip to branch_fragP.  */
11163                 next_fragP = branch_fragP;
11164               }
11165             else if (next_fragP->tc_frag_data.max_prefix_length)
11166               {
11167                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11168                    a fused jcc.  */
11169                 next_fragP->fr_subtype
11170                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11171                 next_fragP->tc_frag_data.max_bytes
11172                   = next_fragP->tc_frag_data.max_prefix_length;
11173                 /* This will be updated in the BRANCH_PREFIX scan.  */
11174                 next_fragP->tc_frag_data.max_prefix_length = 0;
11175               }
11176             else
11177               frag_wane (next_fragP);
11178             break;
11179           }
11180     }
11181
11182   /* Stop if there is no BRANCH_PREFIX.  */
11183   if (!align_branch_prefix_size)
11184     return;
11185
11186   /* Scan for BRANCH_PREFIX.  */
11187   for (; fragP != NULL; fragP = fragP->fr_next)
11188     {
11189       if (fragP->fr_type != rs_machine_dependent
11190           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11191               != BRANCH_PREFIX))
11192         continue;
11193
11194       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
11195          COND_JUMP_PREFIX.  */
11196       max_prefix_length = 0;
11197       for (next_fragP = fragP;
11198            next_fragP != NULL;
11199            next_fragP = next_fragP->fr_next)
11200         {
11201           if (next_fragP->fr_type == rs_fill)
11202             /* Skip rs_fill frags.  */
11203             continue;
11204           else if (next_fragP->fr_type != rs_machine_dependent)
11205             /* Stop for all other frags.  */
11206             break;
11207
11208           /* rs_machine_dependent frags.  */
11209           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11210               == BRANCH_PREFIX)
11211             {
11212               /* Count BRANCH_PREFIX frags.  */
11213               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
11214                 {
11215                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
11216                   frag_wane (next_fragP);
11217                 }
11218               else
11219                 max_prefix_length
11220                   += next_fragP->tc_frag_data.max_bytes;
11221             }
11222           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11223                     == BRANCH_PADDING)
11224                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11225                        == FUSED_JCC_PADDING))
11226             {
11227               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11228               fragP->tc_frag_data.u.padding_fragP = next_fragP;
11229               break;
11230             }
11231           else
11232             /* Stop for other rs_machine_dependent frags.  */
11233             break;
11234         }
11235
11236       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11237
11238       /* Skip to the next frag.  */
11239       fragP = next_fragP;
11240     }
11241 }
11242
11243 /* Compute padding size for
11244
11245         FUSED_JCC_PADDING
11246         CMP like instruction
11247         BRANCH_PADDING
11248         COND_JUMP/UNCOND_JUMP
11249
11250    or
11251
11252         BRANCH_PADDING
11253         COND_JUMP/UNCOND_JUMP
11254  */
11255
11256 static int
11257 i386_branch_padding_size (fragS *fragP, offsetT address)
11258 {
11259   unsigned int offset, size, padding_size;
11260   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11261
11262   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
11263   if (!address)
11264     address = fragP->fr_address;
11265   address += fragP->fr_fix;
11266
11267   /* CMP like instrunction size.  */
11268   size = fragP->tc_frag_data.cmp_size;
11269
11270   /* The base size of the branch frag.  */
11271   size += branch_fragP->fr_fix;
11272
11273   /* Add opcode and displacement bytes for the rs_machine_dependent
11274      branch frag.  */
11275   if (branch_fragP->fr_type == rs_machine_dependent)
11276     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
11277
11278   /* Check if branch is within boundary and doesn't end at the last
11279      byte.  */
11280   offset = address & ((1U << align_branch_power) - 1);
11281   if ((offset + size) >= (1U << align_branch_power))
11282     /* Padding needed to avoid crossing boundary.  */
11283     padding_size = (1U << align_branch_power) - offset;
11284   else
11285     /* No padding needed.  */
11286     padding_size = 0;
11287
11288   /* The return value may be saved in tc_frag_data.length which is
11289      unsigned byte.  */
11290   if (!fits_in_unsigned_byte (padding_size))
11291     abort ();
11292
11293   return padding_size;
11294 }
11295
11296 /* i386_generic_table_relax_frag()
11297
11298    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
11299    grow/shrink padding to align branch frags.  Hand others to
11300    relax_frag().  */
11301
11302 long
11303 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
11304 {
11305   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11306       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11307     {
11308       long padding_size = i386_branch_padding_size (fragP, 0);
11309       long grow = padding_size - fragP->tc_frag_data.length;
11310
11311       /* When the BRANCH_PREFIX frag is used, the computed address
11312          must match the actual address and there should be no padding.  */
11313       if (fragP->tc_frag_data.padding_address
11314           && (fragP->tc_frag_data.padding_address != fragP->fr_address
11315               || padding_size))
11316         abort ();
11317
11318       /* Update the padding size.  */
11319       if (grow)
11320         fragP->tc_frag_data.length = padding_size;
11321
11322       return grow;
11323     }
11324   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11325     {
11326       fragS *padding_fragP, *next_fragP;
11327       long padding_size, left_size, last_size;
11328
11329       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11330       if (!padding_fragP)
11331         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
11332         return (fragP->tc_frag_data.length
11333                 - fragP->tc_frag_data.last_length);
11334
11335       /* Compute the relative address of the padding frag in the very
11336         first time where the BRANCH_PREFIX frag sizes are zero.  */
11337       if (!fragP->tc_frag_data.padding_address)
11338         fragP->tc_frag_data.padding_address
11339           = padding_fragP->fr_address - (fragP->fr_address - stretch);
11340
11341       /* First update the last length from the previous interation.  */
11342       left_size = fragP->tc_frag_data.prefix_length;
11343       for (next_fragP = fragP;
11344            next_fragP != padding_fragP;
11345            next_fragP = next_fragP->fr_next)
11346         if (next_fragP->fr_type == rs_machine_dependent
11347             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11348                 == BRANCH_PREFIX))
11349           {
11350             if (left_size)
11351               {
11352                 int max = next_fragP->tc_frag_data.max_bytes;
11353                 if (max)
11354                   {
11355                     int size;
11356                     if (max > left_size)
11357                       size = left_size;
11358                     else
11359                       size = max;
11360                     left_size -= size;
11361                     next_fragP->tc_frag_data.last_length = size;
11362                   }
11363               }
11364             else
11365               next_fragP->tc_frag_data.last_length = 0;
11366           }
11367
11368       /* Check the padding size for the padding frag.  */
11369       padding_size = i386_branch_padding_size
11370         (padding_fragP, (fragP->fr_address
11371                          + fragP->tc_frag_data.padding_address));
11372
11373       last_size = fragP->tc_frag_data.prefix_length;
11374       /* Check if there is change from the last interation.  */
11375       if (padding_size == last_size)
11376         {
11377           /* Update the expected address of the padding frag.  */
11378           padding_fragP->tc_frag_data.padding_address
11379             = (fragP->fr_address + padding_size
11380                + fragP->tc_frag_data.padding_address);
11381           return 0;
11382         }
11383
11384       if (padding_size > fragP->tc_frag_data.max_prefix_length)
11385         {
11386           /* No padding if there is no sufficient room.  Clear the
11387              expected address of the padding frag.  */
11388           padding_fragP->tc_frag_data.padding_address = 0;
11389           padding_size = 0;
11390         }
11391       else
11392         /* Store the expected address of the padding frag.  */
11393         padding_fragP->tc_frag_data.padding_address
11394           = (fragP->fr_address + padding_size
11395              + fragP->tc_frag_data.padding_address);
11396
11397       fragP->tc_frag_data.prefix_length = padding_size;
11398
11399       /* Update the length for the current interation.  */
11400       left_size = padding_size;
11401       for (next_fragP = fragP;
11402            next_fragP != padding_fragP;
11403            next_fragP = next_fragP->fr_next)
11404         if (next_fragP->fr_type == rs_machine_dependent
11405             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11406                 == BRANCH_PREFIX))
11407           {
11408             if (left_size)
11409               {
11410                 int max = next_fragP->tc_frag_data.max_bytes;
11411                 if (max)
11412                   {
11413                     int size;
11414                     if (max > left_size)
11415                       size = left_size;
11416                     else
11417                       size = max;
11418                     left_size -= size;
11419                     next_fragP->tc_frag_data.length = size;
11420                   }
11421               }
11422             else
11423               next_fragP->tc_frag_data.length = 0;
11424           }
11425
11426       return (fragP->tc_frag_data.length
11427               - fragP->tc_frag_data.last_length);
11428     }
11429   return relax_frag (segment, fragP, stretch);
11430 }
11431
11432 /* md_estimate_size_before_relax()
11433
11434    Called just before relax() for rs_machine_dependent frags.  The x86
11435    assembler uses these frags to handle variable size jump
11436    instructions.
11437
11438    Any symbol that is now undefined will not become defined.
11439    Return the correct fr_subtype in the frag.
11440    Return the initial "guess for variable size of frag" to caller.
11441    The guess is actually the growth beyond the fixed part.  Whatever
11442    we do to grow the fixed or variable part contributes to our
11443    returned value.  */
11444
11445 int
11446 md_estimate_size_before_relax (fragS *fragP, segT segment)
11447 {
11448   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11449       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
11450       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11451     {
11452       i386_classify_machine_dependent_frag (fragP);
11453       return fragP->tc_frag_data.length;
11454     }
11455
11456   /* We've already got fragP->fr_subtype right;  all we have to do is
11457      check for un-relaxable symbols.  On an ELF system, we can't relax
11458      an externally visible symbol, because it may be overridden by a
11459      shared library.  */
11460   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
11461 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11462       || (IS_ELF
11463           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
11464                                                 fragP->fr_var))
11465 #endif
11466 #if defined (OBJ_COFF) && defined (TE_PE)
11467       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
11468           && S_IS_WEAK (fragP->fr_symbol))
11469 #endif
11470       )
11471     {
11472       /* Symbol is undefined in this segment, or we need to keep a
11473          reloc so that weak symbols can be overridden.  */
11474       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
11475       enum bfd_reloc_code_real reloc_type;
11476       unsigned char *opcode;
11477       int old_fr_fix;
11478
11479       if (fragP->fr_var != NO_RELOC)
11480         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
11481       else if (size == 2)
11482         reloc_type = BFD_RELOC_16_PCREL;
11483 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11484       else if (need_plt32_p (fragP->fr_symbol))
11485         reloc_type = BFD_RELOC_X86_64_PLT32;
11486 #endif
11487       else
11488         reloc_type = BFD_RELOC_32_PCREL;
11489
11490       old_fr_fix = fragP->fr_fix;
11491       opcode = (unsigned char *) fragP->fr_opcode;
11492
11493       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
11494         {
11495         case UNCOND_JUMP:
11496           /* Make jmp (0xeb) a (d)word displacement jump.  */
11497           opcode[0] = 0xe9;
11498           fragP->fr_fix += size;
11499           fix_new (fragP, old_fr_fix, size,
11500                    fragP->fr_symbol,
11501                    fragP->fr_offset, 1,
11502                    reloc_type);
11503           break;
11504
11505         case COND_JUMP86:
11506           if (size == 2
11507               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
11508             {
11509               /* Negate the condition, and branch past an
11510                  unconditional jump.  */
11511               opcode[0] ^= 1;
11512               opcode[1] = 3;
11513               /* Insert an unconditional jump.  */
11514               opcode[2] = 0xe9;
11515               /* We added two extra opcode bytes, and have a two byte
11516                  offset.  */
11517               fragP->fr_fix += 2 + 2;
11518               fix_new (fragP, old_fr_fix + 2, 2,
11519                        fragP->fr_symbol,
11520                        fragP->fr_offset, 1,
11521                        reloc_type);
11522               break;
11523             }
11524           /* Fall through.  */
11525
11526         case COND_JUMP:
11527           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
11528             {
11529               fixS *fixP;
11530
11531               fragP->fr_fix += 1;
11532               fixP = fix_new (fragP, old_fr_fix, 1,
11533                               fragP->fr_symbol,
11534                               fragP->fr_offset, 1,
11535                               BFD_RELOC_8_PCREL);
11536               fixP->fx_signed = 1;
11537               break;
11538             }
11539
11540           /* This changes the byte-displacement jump 0x7N
11541              to the (d)word-displacement jump 0x0f,0x8N.  */
11542           opcode[1] = opcode[0] + 0x10;
11543           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11544           /* We've added an opcode byte.  */
11545           fragP->fr_fix += 1 + size;
11546           fix_new (fragP, old_fr_fix + 1, size,
11547                    fragP->fr_symbol,
11548                    fragP->fr_offset, 1,
11549                    reloc_type);
11550           break;
11551
11552         default:
11553           BAD_CASE (fragP->fr_subtype);
11554           break;
11555         }
11556       frag_wane (fragP);
11557       return fragP->fr_fix - old_fr_fix;
11558     }
11559
11560   /* Guess size depending on current relax state.  Initially the relax
11561      state will correspond to a short jump and we return 1, because
11562      the variable part of the frag (the branch offset) is one byte
11563      long.  However, we can relax a section more than once and in that
11564      case we must either set fr_subtype back to the unrelaxed state,
11565      or return the value for the appropriate branch.  */
11566   return md_relax_table[fragP->fr_subtype].rlx_length;
11567 }
11568
11569 /* Called after relax() is finished.
11570
11571    In:  Address of frag.
11572         fr_type == rs_machine_dependent.
11573         fr_subtype is what the address relaxed to.
11574
11575    Out: Any fixSs and constants are set up.
11576         Caller will turn frag into a ".space 0".  */
11577
11578 void
11579 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
11580                  fragS *fragP)
11581 {
11582   unsigned char *opcode;
11583   unsigned char *where_to_put_displacement = NULL;
11584   offsetT target_address;
11585   offsetT opcode_address;
11586   unsigned int extension = 0;
11587   offsetT displacement_from_opcode_start;
11588
11589   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11590       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
11591       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11592     {
11593       /* Generate nop padding.  */
11594       unsigned int size = fragP->tc_frag_data.length;
11595       if (size)
11596         {
11597           if (size > fragP->tc_frag_data.max_bytes)
11598             abort ();
11599
11600           if (flag_debug)
11601             {
11602               const char *msg;
11603               const char *branch = "branch";
11604               const char *prefix = "";
11605               fragS *padding_fragP;
11606               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11607                   == BRANCH_PREFIX)
11608                 {
11609                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11610                   switch (fragP->tc_frag_data.default_prefix)
11611                     {
11612                     default:
11613                       abort ();
11614                       break;
11615                     case CS_PREFIX_OPCODE:
11616                       prefix = " cs";
11617                       break;
11618                     case DS_PREFIX_OPCODE:
11619                       prefix = " ds";
11620                       break;
11621                     case ES_PREFIX_OPCODE:
11622                       prefix = " es";
11623                       break;
11624                     case FS_PREFIX_OPCODE:
11625                       prefix = " fs";
11626                       break;
11627                     case GS_PREFIX_OPCODE:
11628                       prefix = " gs";
11629                       break;
11630                     case SS_PREFIX_OPCODE:
11631                       prefix = " ss";
11632                       break;
11633                     }
11634                   if (padding_fragP)
11635                     msg = _("%s:%u: add %d%s at 0x%llx to align "
11636                             "%s within %d-byte boundary\n");
11637                   else
11638                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
11639                             "align %s within %d-byte boundary\n");
11640                 }
11641               else
11642                 {
11643                   padding_fragP = fragP;
11644                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
11645                           "%s within %d-byte boundary\n");
11646                 }
11647
11648               if (padding_fragP)
11649                 switch (padding_fragP->tc_frag_data.branch_type)
11650                   {
11651                   case align_branch_jcc:
11652                     branch = "jcc";
11653                     break;
11654                   case align_branch_fused:
11655                     branch = "fused jcc";
11656                     break;
11657                   case align_branch_jmp:
11658                     branch = "jmp";
11659                     break;
11660                   case align_branch_call:
11661                     branch = "call";
11662                     break;
11663                   case align_branch_indirect:
11664                     branch = "indiret branch";
11665                     break;
11666                   case align_branch_ret:
11667                     branch = "ret";
11668                     break;
11669                   default:
11670                     break;
11671                   }
11672
11673               fprintf (stdout, msg,
11674                        fragP->fr_file, fragP->fr_line, size, prefix,
11675                        (long long) fragP->fr_address, branch,
11676                        1 << align_branch_power);
11677             }
11678           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11679             memset (fragP->fr_opcode,
11680                     fragP->tc_frag_data.default_prefix, size);
11681           else
11682             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
11683                                 size, 0);
11684           fragP->fr_fix += size;
11685         }
11686       return;
11687     }
11688
11689   opcode = (unsigned char *) fragP->fr_opcode;
11690
11691   /* Address we want to reach in file space.  */
11692   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
11693
11694   /* Address opcode resides at in file space.  */
11695   opcode_address = fragP->fr_address + fragP->fr_fix;
11696
11697   /* Displacement from opcode start to fill into instruction.  */
11698   displacement_from_opcode_start = target_address - opcode_address;
11699
11700   if ((fragP->fr_subtype & BIG) == 0)
11701     {
11702       /* Don't have to change opcode.  */
11703       extension = 1;            /* 1 opcode + 1 displacement  */
11704       where_to_put_displacement = &opcode[1];
11705     }
11706   else
11707     {
11708       if (no_cond_jump_promotion
11709           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
11710         as_warn_where (fragP->fr_file, fragP->fr_line,
11711                        _("long jump required"));
11712
11713       switch (fragP->fr_subtype)
11714         {
11715         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
11716           extension = 4;                /* 1 opcode + 4 displacement  */
11717           opcode[0] = 0xe9;
11718           where_to_put_displacement = &opcode[1];
11719           break;
11720
11721         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
11722           extension = 2;                /* 1 opcode + 2 displacement  */
11723           opcode[0] = 0xe9;
11724           where_to_put_displacement = &opcode[1];
11725           break;
11726
11727         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
11728         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
11729           extension = 5;                /* 2 opcode + 4 displacement  */
11730           opcode[1] = opcode[0] + 0x10;
11731           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11732           where_to_put_displacement = &opcode[2];
11733           break;
11734
11735         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
11736           extension = 3;                /* 2 opcode + 2 displacement  */
11737           opcode[1] = opcode[0] + 0x10;
11738           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11739           where_to_put_displacement = &opcode[2];
11740           break;
11741
11742         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
11743           extension = 4;
11744           opcode[0] ^= 1;
11745           opcode[1] = 3;
11746           opcode[2] = 0xe9;
11747           where_to_put_displacement = &opcode[3];
11748           break;
11749
11750         default:
11751           BAD_CASE (fragP->fr_subtype);
11752           break;
11753         }
11754     }
11755
11756   /* If size if less then four we are sure that the operand fits,
11757      but if it's 4, then it could be that the displacement is larger
11758      then -/+ 2GB.  */
11759   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
11760       && object_64bit
11761       && ((addressT) (displacement_from_opcode_start - extension
11762                       + ((addressT) 1 << 31))
11763           > (((addressT) 2 << 31) - 1)))
11764     {
11765       as_bad_where (fragP->fr_file, fragP->fr_line,
11766                     _("jump target out of range"));
11767       /* Make us emit 0.  */
11768       displacement_from_opcode_start = extension;
11769     }
11770   /* Now put displacement after opcode.  */
11771   md_number_to_chars ((char *) where_to_put_displacement,
11772                       (valueT) (displacement_from_opcode_start - extension),
11773                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
11774   fragP->fr_fix += extension;
11775 }
11776 \f
11777 /* Apply a fixup (fixP) to segment data, once it has been determined
11778    by our caller that we have all the info we need to fix it up.
11779
11780    Parameter valP is the pointer to the value of the bits.
11781
11782    On the 386, immediates, displacements, and data pointers are all in
11783    the same (little-endian) format, so we don't need to care about which
11784    we are handling.  */
11785
11786 void
11787 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
11788 {
11789   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
11790   valueT value = *valP;
11791
11792 #if !defined (TE_Mach)
11793   if (fixP->fx_pcrel)
11794     {
11795       switch (fixP->fx_r_type)
11796         {
11797         default:
11798           break;
11799
11800         case BFD_RELOC_64:
11801           fixP->fx_r_type = BFD_RELOC_64_PCREL;
11802           break;
11803         case BFD_RELOC_32:
11804         case BFD_RELOC_X86_64_32S:
11805           fixP->fx_r_type = BFD_RELOC_32_PCREL;
11806           break;
11807         case BFD_RELOC_16:
11808           fixP->fx_r_type = BFD_RELOC_16_PCREL;
11809           break;
11810         case BFD_RELOC_8:
11811           fixP->fx_r_type = BFD_RELOC_8_PCREL;
11812           break;
11813         }
11814     }
11815
11816   if (fixP->fx_addsy != NULL
11817       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
11818           || fixP->fx_r_type == BFD_RELOC_64_PCREL
11819           || fixP->fx_r_type == BFD_RELOC_16_PCREL
11820           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
11821       && !use_rela_relocations)
11822     {
11823       /* This is a hack.  There should be a better way to handle this.
11824          This covers for the fact that bfd_install_relocation will
11825          subtract the current location (for partial_inplace, PC relative
11826          relocations); see more below.  */
11827 #ifndef OBJ_AOUT
11828       if (IS_ELF
11829 #ifdef TE_PE
11830           || OUTPUT_FLAVOR == bfd_target_coff_flavour
11831 #endif
11832           )
11833         value += fixP->fx_where + fixP->fx_frag->fr_address;
11834 #endif
11835 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11836       if (IS_ELF)
11837         {
11838           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
11839
11840           if ((sym_seg == seg
11841                || (symbol_section_p (fixP->fx_addsy)
11842                    && sym_seg != absolute_section))
11843               && !generic_force_reloc (fixP))
11844             {
11845               /* Yes, we add the values in twice.  This is because
11846                  bfd_install_relocation subtracts them out again.  I think
11847                  bfd_install_relocation is broken, but I don't dare change
11848                  it.  FIXME.  */
11849               value += fixP->fx_where + fixP->fx_frag->fr_address;
11850             }
11851         }
11852 #endif
11853 #if defined (OBJ_COFF) && defined (TE_PE)
11854       /* For some reason, the PE format does not store a
11855          section address offset for a PC relative symbol.  */
11856       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
11857           || S_IS_WEAK (fixP->fx_addsy))
11858         value += md_pcrel_from (fixP);
11859 #endif
11860     }
11861 #if defined (OBJ_COFF) && defined (TE_PE)
11862   if (fixP->fx_addsy != NULL
11863       && S_IS_WEAK (fixP->fx_addsy)
11864       /* PR 16858: Do not modify weak function references.  */
11865       && ! fixP->fx_pcrel)
11866     {
11867 #if !defined (TE_PEP)
11868       /* For x86 PE weak function symbols are neither PC-relative
11869          nor do they set S_IS_FUNCTION.  So the only reliable way
11870          to detect them is to check the flags of their containing
11871          section.  */
11872       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
11873           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
11874         ;
11875       else
11876 #endif
11877       value -= S_GET_VALUE (fixP->fx_addsy);
11878     }
11879 #endif
11880
11881   /* Fix a few things - the dynamic linker expects certain values here,
11882      and we must not disappoint it.  */
11883 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11884   if (IS_ELF && fixP->fx_addsy)
11885     switch (fixP->fx_r_type)
11886       {
11887       case BFD_RELOC_386_PLT32:
11888       case BFD_RELOC_X86_64_PLT32:
11889         /* Make the jump instruction point to the address of the operand.
11890            At runtime we merely add the offset to the actual PLT entry.
11891            NB: Subtract the offset size only for jump instructions.  */
11892         if (fixP->fx_pcrel)
11893           value = -4;
11894         break;
11895
11896       case BFD_RELOC_386_TLS_GD:
11897       case BFD_RELOC_386_TLS_LDM:
11898       case BFD_RELOC_386_TLS_IE_32:
11899       case BFD_RELOC_386_TLS_IE:
11900       case BFD_RELOC_386_TLS_GOTIE:
11901       case BFD_RELOC_386_TLS_GOTDESC:
11902       case BFD_RELOC_X86_64_TLSGD:
11903       case BFD_RELOC_X86_64_TLSLD:
11904       case BFD_RELOC_X86_64_GOTTPOFF:
11905       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
11906         value = 0; /* Fully resolved at runtime.  No addend.  */
11907         /* Fallthrough */
11908       case BFD_RELOC_386_TLS_LE:
11909       case BFD_RELOC_386_TLS_LDO_32:
11910       case BFD_RELOC_386_TLS_LE_32:
11911       case BFD_RELOC_X86_64_DTPOFF32:
11912       case BFD_RELOC_X86_64_DTPOFF64:
11913       case BFD_RELOC_X86_64_TPOFF32:
11914       case BFD_RELOC_X86_64_TPOFF64:
11915         S_SET_THREAD_LOCAL (fixP->fx_addsy);
11916         break;
11917
11918       case BFD_RELOC_386_TLS_DESC_CALL:
11919       case BFD_RELOC_X86_64_TLSDESC_CALL:
11920         value = 0; /* Fully resolved at runtime.  No addend.  */
11921         S_SET_THREAD_LOCAL (fixP->fx_addsy);
11922         fixP->fx_done = 0;
11923         return;
11924
11925       case BFD_RELOC_VTABLE_INHERIT:
11926       case BFD_RELOC_VTABLE_ENTRY:
11927         fixP->fx_done = 0;
11928         return;
11929
11930       default:
11931         break;
11932       }
11933 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
11934   *valP = value;
11935 #endif /* !defined (TE_Mach)  */
11936
11937   /* Are we finished with this relocation now?  */
11938   if (fixP->fx_addsy == NULL)
11939     fixP->fx_done = 1;
11940 #if defined (OBJ_COFF) && defined (TE_PE)
11941   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
11942     {
11943       fixP->fx_done = 0;
11944       /* Remember value for tc_gen_reloc.  */
11945       fixP->fx_addnumber = value;
11946       /* Clear out the frag for now.  */
11947       value = 0;
11948     }
11949 #endif
11950   else if (use_rela_relocations)
11951     {
11952       fixP->fx_no_overflow = 1;
11953       /* Remember value for tc_gen_reloc.  */
11954       fixP->fx_addnumber = value;
11955       value = 0;
11956     }
11957
11958   md_number_to_chars (p, value, fixP->fx_size);
11959 }
11960 \f
11961 const char *
11962 md_atof (int type, char *litP, int *sizeP)
11963 {
11964   /* This outputs the LITTLENUMs in REVERSE order;
11965      in accord with the bigendian 386.  */
11966   return ieee_md_atof (type, litP, sizeP, FALSE);
11967 }
11968 \f
11969 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
11970
11971 static char *
11972 output_invalid (int c)
11973 {
11974   if (ISPRINT (c))
11975     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
11976               "'%c'", c);
11977   else
11978     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
11979               "(0x%x)", (unsigned char) c);
11980   return output_invalid_buf;
11981 }
11982
11983 /* REG_STRING starts *before* REGISTER_PREFIX.  */
11984
11985 static const reg_entry *
11986 parse_real_register (char *reg_string, char **end_op)
11987 {
11988   char *s = reg_string;
11989   char *p;
11990   char reg_name_given[MAX_REG_NAME_SIZE + 1];
11991   const reg_entry *r;
11992
11993   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
11994   if (*s == REGISTER_PREFIX)
11995     ++s;
11996
11997   if (is_space_char (*s))
11998     ++s;
11999
12000   p = reg_name_given;
12001   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12002     {
12003       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12004         return (const reg_entry *) NULL;
12005       s++;
12006     }
12007
12008   /* For naked regs, make sure that we are not dealing with an identifier.
12009      This prevents confusing an identifier like `eax_var' with register
12010      `eax'.  */
12011   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12012     return (const reg_entry *) NULL;
12013
12014   *end_op = s;
12015
12016   r = (const reg_entry *) hash_find (reg_hash, reg_name_given);
12017
12018   /* Handle floating point regs, allowing spaces in the (i) part.  */
12019   if (r == i386_regtab /* %st is first entry of table  */)
12020     {
12021       if (!cpu_arch_flags.bitfield.cpu8087
12022           && !cpu_arch_flags.bitfield.cpu287
12023           && !cpu_arch_flags.bitfield.cpu387)
12024         return (const reg_entry *) NULL;
12025
12026       if (is_space_char (*s))
12027         ++s;
12028       if (*s == '(')
12029         {
12030           ++s;
12031           if (is_space_char (*s))
12032             ++s;
12033           if (*s >= '0' && *s <= '7')
12034             {
12035               int fpr = *s - '0';
12036               ++s;
12037               if (is_space_char (*s))
12038                 ++s;
12039               if (*s == ')')
12040                 {
12041                   *end_op = s + 1;
12042                   r = (const reg_entry *) hash_find (reg_hash, "st(0)");
12043                   know (r);
12044                   return r + fpr;
12045                 }
12046             }
12047           /* We have "%st(" then garbage.  */
12048           return (const reg_entry *) NULL;
12049         }
12050     }
12051
12052   if (r == NULL || allow_pseudo_reg)
12053     return r;
12054
12055   if (operand_type_all_zero (&r->reg_type))
12056     return (const reg_entry *) NULL;
12057
12058   if ((r->reg_type.bitfield.dword
12059        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12060        || r->reg_type.bitfield.class == RegCR
12061        || r->reg_type.bitfield.class == RegDR
12062        || r->reg_type.bitfield.class == RegTR)
12063       && !cpu_arch_flags.bitfield.cpui386)
12064     return (const reg_entry *) NULL;
12065
12066   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12067     return (const reg_entry *) NULL;
12068
12069   if (!cpu_arch_flags.bitfield.cpuavx512f)
12070     {
12071       if (r->reg_type.bitfield.zmmword
12072           || r->reg_type.bitfield.class == RegMask)
12073         return (const reg_entry *) NULL;
12074
12075       if (!cpu_arch_flags.bitfield.cpuavx)
12076         {
12077           if (r->reg_type.bitfield.ymmword)
12078             return (const reg_entry *) NULL;
12079
12080           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12081             return (const reg_entry *) NULL;
12082         }
12083     }
12084
12085   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12086     return (const reg_entry *) NULL;
12087
12088   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12089   if (!allow_index_reg && r->reg_num == RegIZ)
12090     return (const reg_entry *) NULL;
12091
12092   /* Upper 16 vector registers are only available with VREX in 64bit
12093      mode, and require EVEX encoding.  */
12094   if (r->reg_flags & RegVRex)
12095     {
12096       if (!cpu_arch_flags.bitfield.cpuavx512f
12097           || flag_code != CODE_64BIT)
12098         return (const reg_entry *) NULL;
12099
12100       i.vec_encoding = vex_encoding_evex;
12101     }
12102
12103   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12104       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12105       && flag_code != CODE_64BIT)
12106     return (const reg_entry *) NULL;
12107
12108   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12109       && !intel_syntax)
12110     return (const reg_entry *) NULL;
12111
12112   return r;
12113 }
12114
12115 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12116
12117 static const reg_entry *
12118 parse_register (char *reg_string, char **end_op)
12119 {
12120   const reg_entry *r;
12121
12122   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12123     r = parse_real_register (reg_string, end_op);
12124   else
12125     r = NULL;
12126   if (!r)
12127     {
12128       char *save = input_line_pointer;
12129       char c;
12130       symbolS *symbolP;
12131
12132       input_line_pointer = reg_string;
12133       c = get_symbol_name (&reg_string);
12134       symbolP = symbol_find (reg_string);
12135       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
12136         {
12137           const expressionS *e = symbol_get_value_expression (symbolP);
12138
12139           know (e->X_op == O_register);
12140           know (e->X_add_number >= 0
12141                 && (valueT) e->X_add_number < i386_regtab_size);
12142           r = i386_regtab + e->X_add_number;
12143           if ((r->reg_flags & RegVRex))
12144             i.vec_encoding = vex_encoding_evex;
12145           *end_op = input_line_pointer;
12146         }
12147       *input_line_pointer = c;
12148       input_line_pointer = save;
12149     }
12150   return r;
12151 }
12152
12153 int
12154 i386_parse_name (char *name, expressionS *e, char *nextcharP)
12155 {
12156   const reg_entry *r;
12157   char *end = input_line_pointer;
12158
12159   *end = *nextcharP;
12160   r = parse_register (name, &input_line_pointer);
12161   if (r && end <= input_line_pointer)
12162     {
12163       *nextcharP = *input_line_pointer;
12164       *input_line_pointer = 0;
12165       e->X_op = O_register;
12166       e->X_add_number = r - i386_regtab;
12167       return 1;
12168     }
12169   input_line_pointer = end;
12170   *end = 0;
12171   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
12172 }
12173
12174 void
12175 md_operand (expressionS *e)
12176 {
12177   char *end;
12178   const reg_entry *r;
12179
12180   switch (*input_line_pointer)
12181     {
12182     case REGISTER_PREFIX:
12183       r = parse_real_register (input_line_pointer, &end);
12184       if (r)
12185         {
12186           e->X_op = O_register;
12187           e->X_add_number = r - i386_regtab;
12188           input_line_pointer = end;
12189         }
12190       break;
12191
12192     case '[':
12193       gas_assert (intel_syntax);
12194       end = input_line_pointer++;
12195       expression (e);
12196       if (*input_line_pointer == ']')
12197         {
12198           ++input_line_pointer;
12199           e->X_op_symbol = make_expr_symbol (e);
12200           e->X_add_symbol = NULL;
12201           e->X_add_number = 0;
12202           e->X_op = O_index;
12203         }
12204       else
12205         {
12206           e->X_op = O_absent;
12207           input_line_pointer = end;
12208         }
12209       break;
12210     }
12211 }
12212
12213 \f
12214 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12215 const char *md_shortopts = "kVQ:sqnO::";
12216 #else
12217 const char *md_shortopts = "qnO::";
12218 #endif
12219
12220 #define OPTION_32 (OPTION_MD_BASE + 0)
12221 #define OPTION_64 (OPTION_MD_BASE + 1)
12222 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
12223 #define OPTION_MARCH (OPTION_MD_BASE + 3)
12224 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
12225 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
12226 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
12227 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
12228 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
12229 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
12230 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
12231 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
12232 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
12233 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
12234 #define OPTION_X32 (OPTION_MD_BASE + 14)
12235 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
12236 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
12237 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
12238 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
12239 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
12240 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
12241 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
12242 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
12243 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
12244 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
12245 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
12246 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
12247 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
12248 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
12249 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
12250 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
12251
12252 struct option md_longopts[] =
12253 {
12254   {"32", no_argument, NULL, OPTION_32},
12255 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12256      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12257   {"64", no_argument, NULL, OPTION_64},
12258 #endif
12259 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12260   {"x32", no_argument, NULL, OPTION_X32},
12261   {"mshared", no_argument, NULL, OPTION_MSHARED},
12262   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
12263 #endif
12264   {"divide", no_argument, NULL, OPTION_DIVIDE},
12265   {"march", required_argument, NULL, OPTION_MARCH},
12266   {"mtune", required_argument, NULL, OPTION_MTUNE},
12267   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
12268   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
12269   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
12270   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
12271   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
12272   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
12273   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
12274   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
12275   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
12276   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
12277   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
12278   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
12279 # if defined (TE_PE) || defined (TE_PEP)
12280   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
12281 #endif
12282   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
12283   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
12284   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
12285   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
12286   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
12287   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
12288   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
12289   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
12290   {"mamd64", no_argument, NULL, OPTION_MAMD64},
12291   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
12292   {NULL, no_argument, NULL, 0}
12293 };
12294 size_t md_longopts_size = sizeof (md_longopts);
12295
12296 int
12297 md_parse_option (int c, const char *arg)
12298 {
12299   unsigned int j;
12300   char *arch, *next, *saved, *type;
12301
12302   switch (c)
12303     {
12304     case 'n':
12305       optimize_align_code = 0;
12306       break;
12307
12308     case 'q':
12309       quiet_warnings = 1;
12310       break;
12311
12312 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12313       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
12314          should be emitted or not.  FIXME: Not implemented.  */
12315     case 'Q':
12316       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
12317         return 0;
12318       break;
12319
12320       /* -V: SVR4 argument to print version ID.  */
12321     case 'V':
12322       print_version_id ();
12323       break;
12324
12325       /* -k: Ignore for FreeBSD compatibility.  */
12326     case 'k':
12327       break;
12328
12329     case 's':
12330       /* -s: On i386 Solaris, this tells the native assembler to use
12331          .stab instead of .stab.excl.  We always use .stab anyhow.  */
12332       break;
12333
12334     case OPTION_MSHARED:
12335       shared = 1;
12336       break;
12337
12338     case OPTION_X86_USED_NOTE:
12339       if (strcasecmp (arg, "yes") == 0)
12340         x86_used_note = 1;
12341       else if (strcasecmp (arg, "no") == 0)
12342         x86_used_note = 0;
12343       else
12344         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
12345       break;
12346
12347
12348 #endif
12349 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12350      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12351     case OPTION_64:
12352       {
12353         const char **list, **l;
12354
12355         list = bfd_target_list ();
12356         for (l = list; *l != NULL; l++)
12357           if (CONST_STRNEQ (*l, "elf64-x86-64")
12358               || strcmp (*l, "coff-x86-64") == 0
12359               || strcmp (*l, "pe-x86-64") == 0
12360               || strcmp (*l, "pei-x86-64") == 0
12361               || strcmp (*l, "mach-o-x86-64") == 0)
12362             {
12363               default_arch = "x86_64";
12364               break;
12365             }
12366         if (*l == NULL)
12367           as_fatal (_("no compiled in support for x86_64"));
12368         free (list);
12369       }
12370       break;
12371 #endif
12372
12373 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12374     case OPTION_X32:
12375       if (IS_ELF)
12376         {
12377           const char **list, **l;
12378
12379           list = bfd_target_list ();
12380           for (l = list; *l != NULL; l++)
12381             if (CONST_STRNEQ (*l, "elf32-x86-64"))
12382               {
12383                 default_arch = "x86_64:32";
12384                 break;
12385               }
12386           if (*l == NULL)
12387             as_fatal (_("no compiled in support for 32bit x86_64"));
12388           free (list);
12389         }
12390       else
12391         as_fatal (_("32bit x86_64 is only supported for ELF"));
12392       break;
12393 #endif
12394
12395     case OPTION_32:
12396       default_arch = "i386";
12397       break;
12398
12399     case OPTION_DIVIDE:
12400 #ifdef SVR4_COMMENT_CHARS
12401       {
12402         char *n, *t;
12403         const char *s;
12404
12405         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
12406         t = n;
12407         for (s = i386_comment_chars; *s != '\0'; s++)
12408           if (*s != '/')
12409             *t++ = *s;
12410         *t = '\0';
12411         i386_comment_chars = n;
12412       }
12413 #endif
12414       break;
12415
12416     case OPTION_MARCH:
12417       saved = xstrdup (arg);
12418       arch = saved;
12419       /* Allow -march=+nosse.  */
12420       if (*arch == '+')
12421         arch++;
12422       do
12423         {
12424           if (*arch == '.')
12425             as_fatal (_("invalid -march= option: `%s'"), arg);
12426           next = strchr (arch, '+');
12427           if (next)
12428             *next++ = '\0';
12429           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12430             {
12431               if (strcmp (arch, cpu_arch [j].name) == 0)
12432                 {
12433                   /* Processor.  */
12434                   if (! cpu_arch[j].flags.bitfield.cpui386)
12435                     continue;
12436
12437                   cpu_arch_name = cpu_arch[j].name;
12438                   cpu_sub_arch_name = NULL;
12439                   cpu_arch_flags = cpu_arch[j].flags;
12440                   cpu_arch_isa = cpu_arch[j].type;
12441                   cpu_arch_isa_flags = cpu_arch[j].flags;
12442                   if (!cpu_arch_tune_set)
12443                     {
12444                       cpu_arch_tune = cpu_arch_isa;
12445                       cpu_arch_tune_flags = cpu_arch_isa_flags;
12446                     }
12447                   break;
12448                 }
12449               else if (*cpu_arch [j].name == '.'
12450                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
12451                 {
12452                   /* ISA extension.  */
12453                   i386_cpu_flags flags;
12454
12455                   flags = cpu_flags_or (cpu_arch_flags,
12456                                         cpu_arch[j].flags);
12457
12458                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12459                     {
12460                       if (cpu_sub_arch_name)
12461                         {
12462                           char *name = cpu_sub_arch_name;
12463                           cpu_sub_arch_name = concat (name,
12464                                                       cpu_arch[j].name,
12465                                                       (const char *) NULL);
12466                           free (name);
12467                         }
12468                       else
12469                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
12470                       cpu_arch_flags = flags;
12471                       cpu_arch_isa_flags = flags;
12472                     }
12473                   else
12474                     cpu_arch_isa_flags
12475                       = cpu_flags_or (cpu_arch_isa_flags,
12476                                       cpu_arch[j].flags);
12477                   break;
12478                 }
12479             }
12480
12481           if (j >= ARRAY_SIZE (cpu_arch))
12482             {
12483               /* Disable an ISA extension.  */
12484               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
12485                 if (strcmp (arch, cpu_noarch [j].name) == 0)
12486                   {
12487                     i386_cpu_flags flags;
12488
12489                     flags = cpu_flags_and_not (cpu_arch_flags,
12490                                                cpu_noarch[j].flags);
12491                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12492                       {
12493                         if (cpu_sub_arch_name)
12494                           {
12495                             char *name = cpu_sub_arch_name;
12496                             cpu_sub_arch_name = concat (arch,
12497                                                         (const char *) NULL);
12498                             free (name);
12499                           }
12500                         else
12501                           cpu_sub_arch_name = xstrdup (arch);
12502                         cpu_arch_flags = flags;
12503                         cpu_arch_isa_flags = flags;
12504                       }
12505                     break;
12506                   }
12507
12508               if (j >= ARRAY_SIZE (cpu_noarch))
12509                 j = ARRAY_SIZE (cpu_arch);
12510             }
12511
12512           if (j >= ARRAY_SIZE (cpu_arch))
12513             as_fatal (_("invalid -march= option: `%s'"), arg);
12514
12515           arch = next;
12516         }
12517       while (next != NULL);
12518       free (saved);
12519       break;
12520
12521     case OPTION_MTUNE:
12522       if (*arg == '.')
12523         as_fatal (_("invalid -mtune= option: `%s'"), arg);
12524       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12525         {
12526           if (strcmp (arg, cpu_arch [j].name) == 0)
12527             {
12528               cpu_arch_tune_set = 1;
12529               cpu_arch_tune = cpu_arch [j].type;
12530               cpu_arch_tune_flags = cpu_arch[j].flags;
12531               break;
12532             }
12533         }
12534       if (j >= ARRAY_SIZE (cpu_arch))
12535         as_fatal (_("invalid -mtune= option: `%s'"), arg);
12536       break;
12537
12538     case OPTION_MMNEMONIC:
12539       if (strcasecmp (arg, "att") == 0)
12540         intel_mnemonic = 0;
12541       else if (strcasecmp (arg, "intel") == 0)
12542         intel_mnemonic = 1;
12543       else
12544         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
12545       break;
12546
12547     case OPTION_MSYNTAX:
12548       if (strcasecmp (arg, "att") == 0)
12549         intel_syntax = 0;
12550       else if (strcasecmp (arg, "intel") == 0)
12551         intel_syntax = 1;
12552       else
12553         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
12554       break;
12555
12556     case OPTION_MINDEX_REG:
12557       allow_index_reg = 1;
12558       break;
12559
12560     case OPTION_MNAKED_REG:
12561       allow_naked_reg = 1;
12562       break;
12563
12564     case OPTION_MSSE2AVX:
12565       sse2avx = 1;
12566       break;
12567
12568     case OPTION_MSSE_CHECK:
12569       if (strcasecmp (arg, "error") == 0)
12570         sse_check = check_error;
12571       else if (strcasecmp (arg, "warning") == 0)
12572         sse_check = check_warning;
12573       else if (strcasecmp (arg, "none") == 0)
12574         sse_check = check_none;
12575       else
12576         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
12577       break;
12578
12579     case OPTION_MOPERAND_CHECK:
12580       if (strcasecmp (arg, "error") == 0)
12581         operand_check = check_error;
12582       else if (strcasecmp (arg, "warning") == 0)
12583         operand_check = check_warning;
12584       else if (strcasecmp (arg, "none") == 0)
12585         operand_check = check_none;
12586       else
12587         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
12588       break;
12589
12590     case OPTION_MAVXSCALAR:
12591       if (strcasecmp (arg, "128") == 0)
12592         avxscalar = vex128;
12593       else if (strcasecmp (arg, "256") == 0)
12594         avxscalar = vex256;
12595       else
12596         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
12597       break;
12598
12599     case OPTION_MVEXWIG:
12600       if (strcmp (arg, "0") == 0)
12601         vexwig = vexw0;
12602       else if (strcmp (arg, "1") == 0)
12603         vexwig = vexw1;
12604       else
12605         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
12606       break;
12607
12608     case OPTION_MADD_BND_PREFIX:
12609       add_bnd_prefix = 1;
12610       break;
12611
12612     case OPTION_MEVEXLIG:
12613       if (strcmp (arg, "128") == 0)
12614         evexlig = evexl128;
12615       else if (strcmp (arg, "256") == 0)
12616         evexlig = evexl256;
12617       else  if (strcmp (arg, "512") == 0)
12618         evexlig = evexl512;
12619       else
12620         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
12621       break;
12622
12623     case OPTION_MEVEXRCIG:
12624       if (strcmp (arg, "rne") == 0)
12625         evexrcig = rne;
12626       else if (strcmp (arg, "rd") == 0)
12627         evexrcig = rd;
12628       else if (strcmp (arg, "ru") == 0)
12629         evexrcig = ru;
12630       else if (strcmp (arg, "rz") == 0)
12631         evexrcig = rz;
12632       else
12633         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
12634       break;
12635
12636     case OPTION_MEVEXWIG:
12637       if (strcmp (arg, "0") == 0)
12638         evexwig = evexw0;
12639       else if (strcmp (arg, "1") == 0)
12640         evexwig = evexw1;
12641       else
12642         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
12643       break;
12644
12645 # if defined (TE_PE) || defined (TE_PEP)
12646     case OPTION_MBIG_OBJ:
12647       use_big_obj = 1;
12648       break;
12649 #endif
12650
12651     case OPTION_MOMIT_LOCK_PREFIX:
12652       if (strcasecmp (arg, "yes") == 0)
12653         omit_lock_prefix = 1;
12654       else if (strcasecmp (arg, "no") == 0)
12655         omit_lock_prefix = 0;
12656       else
12657         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
12658       break;
12659
12660     case OPTION_MFENCE_AS_LOCK_ADD:
12661       if (strcasecmp (arg, "yes") == 0)
12662         avoid_fence = 1;
12663       else if (strcasecmp (arg, "no") == 0)
12664         avoid_fence = 0;
12665       else
12666         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
12667       break;
12668
12669     case OPTION_MRELAX_RELOCATIONS:
12670       if (strcasecmp (arg, "yes") == 0)
12671         generate_relax_relocations = 1;
12672       else if (strcasecmp (arg, "no") == 0)
12673         generate_relax_relocations = 0;
12674       else
12675         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
12676       break;
12677
12678     case OPTION_MALIGN_BRANCH_BOUNDARY:
12679       {
12680         char *end;
12681         long int align = strtoul (arg, &end, 0);
12682         if (*end == '\0')
12683           {
12684             if (align == 0)
12685               {
12686                 align_branch_power = 0;
12687                 break;
12688               }
12689             else if (align >= 16)
12690               {
12691                 int align_power;
12692                 for (align_power = 0;
12693                      (align & 1) == 0;
12694                      align >>= 1, align_power++)
12695                   continue;
12696                 /* Limit alignment power to 31.  */
12697                 if (align == 1 && align_power < 32)
12698                   {
12699                     align_branch_power = align_power;
12700                     break;
12701                   }
12702               }
12703           }
12704         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
12705       }
12706       break;
12707
12708     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
12709       {
12710         char *end;
12711         int align = strtoul (arg, &end, 0);
12712         /* Some processors only support 5 prefixes.  */
12713         if (*end == '\0' && align >= 0 && align < 6)
12714           {
12715             align_branch_prefix_size = align;
12716             break;
12717           }
12718         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
12719                   arg);
12720       }
12721       break;
12722
12723     case OPTION_MALIGN_BRANCH:
12724       align_branch = 0;
12725       saved = xstrdup (arg);
12726       type = saved;
12727       do
12728         {
12729           next = strchr (type, '+');
12730           if (next)
12731             *next++ = '\0';
12732           if (strcasecmp (type, "jcc") == 0)
12733             align_branch |= align_branch_jcc_bit;
12734           else if (strcasecmp (type, "fused") == 0)
12735             align_branch |= align_branch_fused_bit;
12736           else if (strcasecmp (type, "jmp") == 0)
12737             align_branch |= align_branch_jmp_bit;
12738           else if (strcasecmp (type, "call") == 0)
12739             align_branch |= align_branch_call_bit;
12740           else if (strcasecmp (type, "ret") == 0)
12741             align_branch |= align_branch_ret_bit;
12742           else if (strcasecmp (type, "indirect") == 0)
12743             align_branch |= align_branch_indirect_bit;
12744           else
12745             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
12746           type = next;
12747         }
12748       while (next != NULL);
12749       free (saved);
12750       break;
12751
12752     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
12753       align_branch_power = 5;
12754       align_branch_prefix_size = 5;
12755       align_branch = (align_branch_jcc_bit
12756                       | align_branch_fused_bit
12757                       | align_branch_jmp_bit);
12758       break;
12759
12760     case OPTION_MAMD64:
12761       isa64 = amd64;
12762       break;
12763
12764     case OPTION_MINTEL64:
12765       isa64 = intel64;
12766       break;
12767
12768     case 'O':
12769       if (arg == NULL)
12770         {
12771           optimize = 1;
12772           /* Turn off -Os.  */
12773           optimize_for_space = 0;
12774         }
12775       else if (*arg == 's')
12776         {
12777           optimize_for_space = 1;
12778           /* Turn on all encoding optimizations.  */
12779           optimize = INT_MAX;
12780         }
12781       else
12782         {
12783           optimize = atoi (arg);
12784           /* Turn off -Os.  */
12785           optimize_for_space = 0;
12786         }
12787       break;
12788
12789     default:
12790       return 0;
12791     }
12792   return 1;
12793 }
12794
12795 #define MESSAGE_TEMPLATE \
12796 "                                                                                "
12797
12798 static char *
12799 output_message (FILE *stream, char *p, char *message, char *start,
12800                 int *left_p, const char *name, int len)
12801 {
12802   int size = sizeof (MESSAGE_TEMPLATE);
12803   int left = *left_p;
12804
12805   /* Reserve 2 spaces for ", " or ",\0" */
12806   left -= len + 2;
12807
12808   /* Check if there is any room.  */
12809   if (left >= 0)
12810     {
12811       if (p != start)
12812         {
12813           *p++ = ',';
12814           *p++ = ' ';
12815         }
12816       p = mempcpy (p, name, len);
12817     }
12818   else
12819     {
12820       /* Output the current message now and start a new one.  */
12821       *p++ = ',';
12822       *p = '\0';
12823       fprintf (stream, "%s\n", message);
12824       p = start;
12825       left = size - (start - message) - len - 2;
12826
12827       gas_assert (left >= 0);
12828
12829       p = mempcpy (p, name, len);
12830     }
12831
12832   *left_p = left;
12833   return p;
12834 }
12835
12836 static void
12837 show_arch (FILE *stream, int ext, int check)
12838 {
12839   static char message[] = MESSAGE_TEMPLATE;
12840   char *start = message + 27;
12841   char *p;
12842   int size = sizeof (MESSAGE_TEMPLATE);
12843   int left;
12844   const char *name;
12845   int len;
12846   unsigned int j;
12847
12848   p = start;
12849   left = size - (start - message);
12850   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12851     {
12852       /* Should it be skipped?  */
12853       if (cpu_arch [j].skip)
12854         continue;
12855
12856       name = cpu_arch [j].name;
12857       len = cpu_arch [j].len;
12858       if (*name == '.')
12859         {
12860           /* It is an extension.  Skip if we aren't asked to show it.  */
12861           if (ext)
12862             {
12863               name++;
12864               len--;
12865             }
12866           else
12867             continue;
12868         }
12869       else if (ext)
12870         {
12871           /* It is an processor.  Skip if we show only extension.  */
12872           continue;
12873         }
12874       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
12875         {
12876           /* It is an impossible processor - skip.  */
12877           continue;
12878         }
12879
12880       p = output_message (stream, p, message, start, &left, name, len);
12881     }
12882
12883   /* Display disabled extensions.  */
12884   if (ext)
12885     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
12886       {
12887         name = cpu_noarch [j].name;
12888         len = cpu_noarch [j].len;
12889         p = output_message (stream, p, message, start, &left, name,
12890                             len);
12891       }
12892
12893   *p = '\0';
12894   fprintf (stream, "%s\n", message);
12895 }
12896
12897 void
12898 md_show_usage (FILE *stream)
12899 {
12900 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12901   fprintf (stream, _("\
12902   -Qy, -Qn                ignored\n\
12903   -V                      print assembler version number\n\
12904   -k                      ignored\n"));
12905 #endif
12906   fprintf (stream, _("\
12907   -n                      Do not optimize code alignment\n\
12908   -q                      quieten some warnings\n"));
12909 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12910   fprintf (stream, _("\
12911   -s                      ignored\n"));
12912 #endif
12913 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12914                       || defined (TE_PE) || defined (TE_PEP))
12915   fprintf (stream, _("\
12916   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
12917 #endif
12918 #ifdef SVR4_COMMENT_CHARS
12919   fprintf (stream, _("\
12920   --divide                do not treat `/' as a comment character\n"));
12921 #else
12922   fprintf (stream, _("\
12923   --divide                ignored\n"));
12924 #endif
12925   fprintf (stream, _("\
12926   -march=CPU[,+EXTENSION...]\n\
12927                           generate code for CPU and EXTENSION, CPU is one of:\n"));
12928   show_arch (stream, 0, 1);
12929   fprintf (stream, _("\
12930                           EXTENSION is combination of:\n"));
12931   show_arch (stream, 1, 0);
12932   fprintf (stream, _("\
12933   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
12934   show_arch (stream, 0, 0);
12935   fprintf (stream, _("\
12936   -msse2avx               encode SSE instructions with VEX prefix\n"));
12937   fprintf (stream, _("\
12938   -msse-check=[none|error|warning] (default: warning)\n\
12939                           check SSE instructions\n"));
12940   fprintf (stream, _("\
12941   -moperand-check=[none|error|warning] (default: warning)\n\
12942                           check operand combinations for validity\n"));
12943   fprintf (stream, _("\
12944   -mavxscalar=[128|256] (default: 128)\n\
12945                           encode scalar AVX instructions with specific vector\n\
12946                            length\n"));
12947   fprintf (stream, _("\
12948   -mvexwig=[0|1] (default: 0)\n\
12949                           encode VEX instructions with specific VEX.W value\n\
12950                            for VEX.W bit ignored instructions\n"));
12951   fprintf (stream, _("\
12952   -mevexlig=[128|256|512] (default: 128)\n\
12953                           encode scalar EVEX instructions with specific vector\n\
12954                            length\n"));
12955   fprintf (stream, _("\
12956   -mevexwig=[0|1] (default: 0)\n\
12957                           encode EVEX instructions with specific EVEX.W value\n\
12958                            for EVEX.W bit ignored instructions\n"));
12959   fprintf (stream, _("\
12960   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
12961                           encode EVEX instructions with specific EVEX.RC value\n\
12962                            for SAE-only ignored instructions\n"));
12963   fprintf (stream, _("\
12964   -mmnemonic=[att|intel] "));
12965   if (SYSV386_COMPAT)
12966     fprintf (stream, _("(default: att)\n"));
12967   else
12968     fprintf (stream, _("(default: intel)\n"));
12969   fprintf (stream, _("\
12970                           use AT&T/Intel mnemonic\n"));
12971   fprintf (stream, _("\
12972   -msyntax=[att|intel] (default: att)\n\
12973                           use AT&T/Intel syntax\n"));
12974   fprintf (stream, _("\
12975   -mindex-reg             support pseudo index registers\n"));
12976   fprintf (stream, _("\
12977   -mnaked-reg             don't require `%%' prefix for registers\n"));
12978   fprintf (stream, _("\
12979   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
12980 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12981   fprintf (stream, _("\
12982   -mshared                disable branch optimization for shared code\n"));
12983   fprintf (stream, _("\
12984   -mx86-used-note=[no|yes] "));
12985   if (DEFAULT_X86_USED_NOTE)
12986     fprintf (stream, _("(default: yes)\n"));
12987   else
12988     fprintf (stream, _("(default: no)\n"));
12989   fprintf (stream, _("\
12990                           generate x86 used ISA and feature properties\n"));
12991 #endif
12992 #if defined (TE_PE) || defined (TE_PEP)
12993   fprintf (stream, _("\
12994   -mbig-obj               generate big object files\n"));
12995 #endif
12996   fprintf (stream, _("\
12997   -momit-lock-prefix=[no|yes] (default: no)\n\
12998                           strip all lock prefixes\n"));
12999   fprintf (stream, _("\
13000   -mfence-as-lock-add=[no|yes] (default: no)\n\
13001                           encode lfence, mfence and sfence as\n\
13002                            lock addl $0x0, (%%{re}sp)\n"));
13003   fprintf (stream, _("\
13004   -mrelax-relocations=[no|yes] "));
13005   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13006     fprintf (stream, _("(default: yes)\n"));
13007   else
13008     fprintf (stream, _("(default: no)\n"));
13009   fprintf (stream, _("\
13010                           generate relax relocations\n"));
13011   fprintf (stream, _("\
13012   -malign-branch-boundary=NUM (default: 0)\n\
13013                           align branches within NUM byte boundary\n"));
13014   fprintf (stream, _("\
13015   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13016                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13017                            indirect\n\
13018                           specify types of branches to align\n"));
13019   fprintf (stream, _("\
13020   -malign-branch-prefix-size=NUM (default: 5)\n\
13021                           align branches with NUM prefixes per instruction\n"));
13022   fprintf (stream, _("\
13023   -mbranches-within-32B-boundaries\n\
13024                           align branches within 32 byte boundary\n"));
13025   fprintf (stream, _("\
13026   -mamd64                 accept only AMD64 ISA [default]\n"));
13027   fprintf (stream, _("\
13028   -mintel64               accept only Intel64 ISA\n"));
13029 }
13030
13031 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13032      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13033      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13034
13035 /* Pick the target format to use.  */
13036
13037 const char *
13038 i386_target_format (void)
13039 {
13040   if (!strncmp (default_arch, "x86_64", 6))
13041     {
13042       update_code_flag (CODE_64BIT, 1);
13043       if (default_arch[6] == '\0')
13044         x86_elf_abi = X86_64_ABI;
13045       else
13046         x86_elf_abi = X86_64_X32_ABI;
13047     }
13048   else if (!strcmp (default_arch, "i386"))
13049     update_code_flag (CODE_32BIT, 1);
13050   else if (!strcmp (default_arch, "iamcu"))
13051     {
13052       update_code_flag (CODE_32BIT, 1);
13053       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
13054         {
13055           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
13056           cpu_arch_name = "iamcu";
13057           cpu_sub_arch_name = NULL;
13058           cpu_arch_flags = iamcu_flags;
13059           cpu_arch_isa = PROCESSOR_IAMCU;
13060           cpu_arch_isa_flags = iamcu_flags;
13061           if (!cpu_arch_tune_set)
13062             {
13063               cpu_arch_tune = cpu_arch_isa;
13064               cpu_arch_tune_flags = cpu_arch_isa_flags;
13065             }
13066         }
13067       else if (cpu_arch_isa != PROCESSOR_IAMCU)
13068         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
13069                   cpu_arch_name);
13070     }
13071   else
13072     as_fatal (_("unknown architecture"));
13073
13074   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
13075     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13076   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
13077     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13078
13079   switch (OUTPUT_FLAVOR)
13080     {
13081 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
13082     case bfd_target_aout_flavour:
13083       return AOUT_TARGET_FORMAT;
13084 #endif
13085 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
13086 # if defined (TE_PE) || defined (TE_PEP)
13087     case bfd_target_coff_flavour:
13088       if (flag_code == CODE_64BIT)
13089         return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
13090       else
13091         return "pe-i386";
13092 # elif defined (TE_GO32)
13093     case bfd_target_coff_flavour:
13094       return "coff-go32";
13095 # else
13096     case bfd_target_coff_flavour:
13097       return "coff-i386";
13098 # endif
13099 #endif
13100 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13101     case bfd_target_elf_flavour:
13102       {
13103         const char *format;
13104
13105         switch (x86_elf_abi)
13106           {
13107           default:
13108             format = ELF_TARGET_FORMAT;
13109 #ifndef TE_SOLARIS
13110             tls_get_addr = "___tls_get_addr";
13111 #endif
13112             break;
13113           case X86_64_ABI:
13114             use_rela_relocations = 1;
13115             object_64bit = 1;
13116 #ifndef TE_SOLARIS
13117             tls_get_addr = "__tls_get_addr";
13118 #endif
13119             format = ELF_TARGET_FORMAT64;
13120             break;
13121           case X86_64_X32_ABI:
13122             use_rela_relocations = 1;
13123             object_64bit = 1;
13124 #ifndef TE_SOLARIS
13125             tls_get_addr = "__tls_get_addr";
13126 #endif
13127             disallow_64bit_reloc = 1;
13128             format = ELF_TARGET_FORMAT32;
13129             break;
13130           }
13131         if (cpu_arch_isa == PROCESSOR_L1OM)
13132           {
13133             if (x86_elf_abi != X86_64_ABI)
13134               as_fatal (_("Intel L1OM is 64bit only"));
13135             return ELF_TARGET_L1OM_FORMAT;
13136           }
13137         else if (cpu_arch_isa == PROCESSOR_K1OM)
13138           {
13139             if (x86_elf_abi != X86_64_ABI)
13140               as_fatal (_("Intel K1OM is 64bit only"));
13141             return ELF_TARGET_K1OM_FORMAT;
13142           }
13143         else if (cpu_arch_isa == PROCESSOR_IAMCU)
13144           {
13145             if (x86_elf_abi != I386_ABI)
13146               as_fatal (_("Intel MCU is 32bit only"));
13147             return ELF_TARGET_IAMCU_FORMAT;
13148           }
13149         else
13150           return format;
13151       }
13152 #endif
13153 #if defined (OBJ_MACH_O)
13154     case bfd_target_mach_o_flavour:
13155       if (flag_code == CODE_64BIT)
13156         {
13157           use_rela_relocations = 1;
13158           object_64bit = 1;
13159           return "mach-o-x86-64";
13160         }
13161       else
13162         return "mach-o-i386";
13163 #endif
13164     default:
13165       abort ();
13166       return NULL;
13167     }
13168 }
13169
13170 #endif /* OBJ_MAYBE_ more than one  */
13171 \f
13172 symbolS *
13173 md_undefined_symbol (char *name)
13174 {
13175   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
13176       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
13177       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
13178       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
13179     {
13180       if (!GOT_symbol)
13181         {
13182           if (symbol_find (name))
13183             as_bad (_("GOT already in symbol table"));
13184           GOT_symbol = symbol_new (name, undefined_section,
13185                                    (valueT) 0, &zero_address_frag);
13186         };
13187       return GOT_symbol;
13188     }
13189   return 0;
13190 }
13191
13192 /* Round up a section size to the appropriate boundary.  */
13193
13194 valueT
13195 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
13196 {
13197 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13198   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
13199     {
13200       /* For a.out, force the section size to be aligned.  If we don't do
13201          this, BFD will align it for us, but it will not write out the
13202          final bytes of the section.  This may be a bug in BFD, but it is
13203          easier to fix it here since that is how the other a.out targets
13204          work.  */
13205       int align;
13206
13207       align = bfd_section_alignment (segment);
13208       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
13209     }
13210 #endif
13211
13212   return size;
13213 }
13214
13215 /* On the i386, PC-relative offsets are relative to the start of the
13216    next instruction.  That is, the address of the offset, plus its
13217    size, since the offset is always the last part of the insn.  */
13218
13219 long
13220 md_pcrel_from (fixS *fixP)
13221 {
13222   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
13223 }
13224
13225 #ifndef I386COFF
13226
13227 static void
13228 s_bss (int ignore ATTRIBUTE_UNUSED)
13229 {
13230   int temp;
13231
13232 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13233   if (IS_ELF)
13234     obj_elf_section_change_hook ();
13235 #endif
13236   temp = get_absolute_expression ();
13237   subseg_set (bss_section, (subsegT) temp);
13238   demand_empty_rest_of_line ();
13239 }
13240
13241 #endif
13242
13243 /* Remember constant directive.  */
13244
13245 void
13246 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
13247 {
13248   if (last_insn.kind != last_insn_directive
13249       && (bfd_section_flags (now_seg) & SEC_CODE))
13250     {
13251       last_insn.seg = now_seg;
13252       last_insn.kind = last_insn_directive;
13253       last_insn.name = "constant directive";
13254       last_insn.file = as_where (&last_insn.line);
13255     }
13256 }
13257
13258 void
13259 i386_validate_fix (fixS *fixp)
13260 {
13261   if (fixp->fx_subsy)
13262     {
13263       if (fixp->fx_subsy == GOT_symbol)
13264         {
13265           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
13266             {
13267               if (!object_64bit)
13268                 abort ();
13269 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13270               if (fixp->fx_tcbit2)
13271                 fixp->fx_r_type = (fixp->fx_tcbit
13272                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
13273                                    : BFD_RELOC_X86_64_GOTPCRELX);
13274               else
13275 #endif
13276                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
13277             }
13278           else
13279             {
13280               if (!object_64bit)
13281                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
13282               else
13283                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
13284             }
13285           fixp->fx_subsy = 0;
13286         }
13287     }
13288 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13289   else if (!object_64bit)
13290     {
13291       if (fixp->fx_r_type == BFD_RELOC_386_GOT32
13292           && fixp->fx_tcbit2)
13293         fixp->fx_r_type = BFD_RELOC_386_GOT32X;
13294     }
13295 #endif
13296 }
13297
13298 arelent *
13299 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
13300 {
13301   arelent *rel;
13302   bfd_reloc_code_real_type code;
13303
13304   switch (fixp->fx_r_type)
13305     {
13306 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13307     case BFD_RELOC_SIZE32:
13308     case BFD_RELOC_SIZE64:
13309       if (S_IS_DEFINED (fixp->fx_addsy)
13310           && !S_IS_EXTERNAL (fixp->fx_addsy))
13311         {
13312           /* Resolve size relocation against local symbol to size of
13313              the symbol plus addend.  */
13314           valueT value = S_GET_SIZE (fixp->fx_addsy) + fixp->fx_offset;
13315           if (fixp->fx_r_type == BFD_RELOC_SIZE32
13316               && !fits_in_unsigned_long (value))
13317             as_bad_where (fixp->fx_file, fixp->fx_line,
13318                           _("symbol size computation overflow"));
13319           fixp->fx_addsy = NULL;
13320           fixp->fx_subsy = NULL;
13321           md_apply_fix (fixp, (valueT *) &value, NULL);
13322           return NULL;
13323         }
13324 #endif
13325       /* Fall through.  */
13326
13327     case BFD_RELOC_X86_64_PLT32:
13328     case BFD_RELOC_X86_64_GOT32:
13329     case BFD_RELOC_X86_64_GOTPCREL:
13330     case BFD_RELOC_X86_64_GOTPCRELX:
13331     case BFD_RELOC_X86_64_REX_GOTPCRELX:
13332     case BFD_RELOC_386_PLT32:
13333     case BFD_RELOC_386_GOT32:
13334     case BFD_RELOC_386_GOT32X:
13335     case BFD_RELOC_386_GOTOFF:
13336     case BFD_RELOC_386_GOTPC:
13337     case BFD_RELOC_386_TLS_GD:
13338     case BFD_RELOC_386_TLS_LDM:
13339     case BFD_RELOC_386_TLS_LDO_32:
13340     case BFD_RELOC_386_TLS_IE_32:
13341     case BFD_RELOC_386_TLS_IE:
13342     case BFD_RELOC_386_TLS_GOTIE:
13343     case BFD_RELOC_386_TLS_LE_32:
13344     case BFD_RELOC_386_TLS_LE:
13345     case BFD_RELOC_386_TLS_GOTDESC:
13346     case BFD_RELOC_386_TLS_DESC_CALL:
13347     case BFD_RELOC_X86_64_TLSGD:
13348     case BFD_RELOC_X86_64_TLSLD:
13349     case BFD_RELOC_X86_64_DTPOFF32:
13350     case BFD_RELOC_X86_64_DTPOFF64:
13351     case BFD_RELOC_X86_64_GOTTPOFF:
13352     case BFD_RELOC_X86_64_TPOFF32:
13353     case BFD_RELOC_X86_64_TPOFF64:
13354     case BFD_RELOC_X86_64_GOTOFF64:
13355     case BFD_RELOC_X86_64_GOTPC32:
13356     case BFD_RELOC_X86_64_GOT64:
13357     case BFD_RELOC_X86_64_GOTPCREL64:
13358     case BFD_RELOC_X86_64_GOTPC64:
13359     case BFD_RELOC_X86_64_GOTPLT64:
13360     case BFD_RELOC_X86_64_PLTOFF64:
13361     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13362     case BFD_RELOC_X86_64_TLSDESC_CALL:
13363     case BFD_RELOC_RVA:
13364     case BFD_RELOC_VTABLE_ENTRY:
13365     case BFD_RELOC_VTABLE_INHERIT:
13366 #ifdef TE_PE
13367     case BFD_RELOC_32_SECREL:
13368 #endif
13369       code = fixp->fx_r_type;
13370       break;
13371     case BFD_RELOC_X86_64_32S:
13372       if (!fixp->fx_pcrel)
13373         {
13374           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
13375           code = fixp->fx_r_type;
13376           break;
13377         }
13378       /* Fall through.  */
13379     default:
13380       if (fixp->fx_pcrel)
13381         {
13382           switch (fixp->fx_size)
13383             {
13384             default:
13385               as_bad_where (fixp->fx_file, fixp->fx_line,
13386                             _("can not do %d byte pc-relative relocation"),
13387                             fixp->fx_size);
13388               code = BFD_RELOC_32_PCREL;
13389               break;
13390             case 1: code = BFD_RELOC_8_PCREL;  break;
13391             case 2: code = BFD_RELOC_16_PCREL; break;
13392             case 4: code = BFD_RELOC_32_PCREL; break;
13393 #ifdef BFD64
13394             case 8: code = BFD_RELOC_64_PCREL; break;
13395 #endif
13396             }
13397         }
13398       else
13399         {
13400           switch (fixp->fx_size)
13401             {
13402             default:
13403               as_bad_where (fixp->fx_file, fixp->fx_line,
13404                             _("can not do %d byte relocation"),
13405                             fixp->fx_size);
13406               code = BFD_RELOC_32;
13407               break;
13408             case 1: code = BFD_RELOC_8;  break;
13409             case 2: code = BFD_RELOC_16; break;
13410             case 4: code = BFD_RELOC_32; break;
13411 #ifdef BFD64
13412             case 8: code = BFD_RELOC_64; break;
13413 #endif
13414             }
13415         }
13416       break;
13417     }
13418
13419   if ((code == BFD_RELOC_32
13420        || code == BFD_RELOC_32_PCREL
13421        || code == BFD_RELOC_X86_64_32S)
13422       && GOT_symbol
13423       && fixp->fx_addsy == GOT_symbol)
13424     {
13425       if (!object_64bit)
13426         code = BFD_RELOC_386_GOTPC;
13427       else
13428         code = BFD_RELOC_X86_64_GOTPC32;
13429     }
13430   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
13431       && GOT_symbol
13432       && fixp->fx_addsy == GOT_symbol)
13433     {
13434       code = BFD_RELOC_X86_64_GOTPC64;
13435     }
13436
13437   rel = XNEW (arelent);
13438   rel->sym_ptr_ptr = XNEW (asymbol *);
13439   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
13440
13441   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
13442
13443   if (!use_rela_relocations)
13444     {
13445       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
13446          vtable entry to be used in the relocation's section offset.  */
13447       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
13448         rel->address = fixp->fx_offset;
13449 #if defined (OBJ_COFF) && defined (TE_PE)
13450       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
13451         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
13452       else
13453 #endif
13454       rel->addend = 0;
13455     }
13456   /* Use the rela in 64bit mode.  */
13457   else
13458     {
13459       if (disallow_64bit_reloc)
13460         switch (code)
13461           {
13462           case BFD_RELOC_X86_64_DTPOFF64:
13463           case BFD_RELOC_X86_64_TPOFF64:
13464           case BFD_RELOC_64_PCREL:
13465           case BFD_RELOC_X86_64_GOTOFF64:
13466           case BFD_RELOC_X86_64_GOT64:
13467           case BFD_RELOC_X86_64_GOTPCREL64:
13468           case BFD_RELOC_X86_64_GOTPC64:
13469           case BFD_RELOC_X86_64_GOTPLT64:
13470           case BFD_RELOC_X86_64_PLTOFF64:
13471             as_bad_where (fixp->fx_file, fixp->fx_line,
13472                           _("cannot represent relocation type %s in x32 mode"),
13473                           bfd_get_reloc_code_name (code));
13474             break;
13475           default:
13476             break;
13477           }
13478
13479       if (!fixp->fx_pcrel)
13480         rel->addend = fixp->fx_offset;
13481       else
13482         switch (code)
13483           {
13484           case BFD_RELOC_X86_64_PLT32:
13485           case BFD_RELOC_X86_64_GOT32:
13486           case BFD_RELOC_X86_64_GOTPCREL:
13487           case BFD_RELOC_X86_64_GOTPCRELX:
13488           case BFD_RELOC_X86_64_REX_GOTPCRELX:
13489           case BFD_RELOC_X86_64_TLSGD:
13490           case BFD_RELOC_X86_64_TLSLD:
13491           case BFD_RELOC_X86_64_GOTTPOFF:
13492           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13493           case BFD_RELOC_X86_64_TLSDESC_CALL:
13494             rel->addend = fixp->fx_offset - fixp->fx_size;
13495             break;
13496           default:
13497             rel->addend = (section->vma
13498                            - fixp->fx_size
13499                            + fixp->fx_addnumber
13500                            + md_pcrel_from (fixp));
13501             break;
13502           }
13503     }
13504
13505   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
13506   if (rel->howto == NULL)
13507     {
13508       as_bad_where (fixp->fx_file, fixp->fx_line,
13509                     _("cannot represent relocation type %s"),
13510                     bfd_get_reloc_code_name (code));
13511       /* Set howto to a garbage value so that we can keep going.  */
13512       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
13513       gas_assert (rel->howto != NULL);
13514     }
13515
13516   return rel;
13517 }
13518
13519 #include "tc-i386-intel.c"
13520
13521 void
13522 tc_x86_parse_to_dw2regnum (expressionS *exp)
13523 {
13524   int saved_naked_reg;
13525   char saved_register_dot;
13526
13527   saved_naked_reg = allow_naked_reg;
13528   allow_naked_reg = 1;
13529   saved_register_dot = register_chars['.'];
13530   register_chars['.'] = '.';
13531   allow_pseudo_reg = 1;
13532   expression_and_evaluate (exp);
13533   allow_pseudo_reg = 0;
13534   register_chars['.'] = saved_register_dot;
13535   allow_naked_reg = saved_naked_reg;
13536
13537   if (exp->X_op == O_register && exp->X_add_number >= 0)
13538     {
13539       if ((addressT) exp->X_add_number < i386_regtab_size)
13540         {
13541           exp->X_op = O_constant;
13542           exp->X_add_number = i386_regtab[exp->X_add_number]
13543                               .dw2_regnum[flag_code >> 1];
13544         }
13545       else
13546         exp->X_op = O_illegal;
13547     }
13548 }
13549
13550 void
13551 tc_x86_frame_initial_instructions (void)
13552 {
13553   static unsigned int sp_regno[2];
13554
13555   if (!sp_regno[flag_code >> 1])
13556     {
13557       char *saved_input = input_line_pointer;
13558       char sp[][4] = {"esp", "rsp"};
13559       expressionS exp;
13560
13561       input_line_pointer = sp[flag_code >> 1];
13562       tc_x86_parse_to_dw2regnum (&exp);
13563       gas_assert (exp.X_op == O_constant);
13564       sp_regno[flag_code >> 1] = exp.X_add_number;
13565       input_line_pointer = saved_input;
13566     }
13567
13568   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
13569   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
13570 }
13571
13572 int
13573 x86_dwarf2_addr_size (void)
13574 {
13575 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13576   if (x86_elf_abi == X86_64_X32_ABI)
13577     return 4;
13578 #endif
13579   return bfd_arch_bits_per_address (stdoutput) / 8;
13580 }
13581
13582 int
13583 i386_elf_section_type (const char *str, size_t len)
13584 {
13585   if (flag_code == CODE_64BIT
13586       && len == sizeof ("unwind") - 1
13587       && strncmp (str, "unwind", 6) == 0)
13588     return SHT_X86_64_UNWIND;
13589
13590   return -1;
13591 }
13592
13593 #ifdef TE_SOLARIS
13594 void
13595 i386_solaris_fix_up_eh_frame (segT sec)
13596 {
13597   if (flag_code == CODE_64BIT)
13598     elf_section_type (sec) = SHT_X86_64_UNWIND;
13599 }
13600 #endif
13601
13602 #ifdef TE_PE
13603 void
13604 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
13605 {
13606   expressionS exp;
13607
13608   exp.X_op = O_secrel;
13609   exp.X_add_symbol = symbol;
13610   exp.X_add_number = 0;
13611   emit_expr (&exp, size);
13612 }
13613 #endif
13614
13615 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13616 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
13617
13618 bfd_vma
13619 x86_64_section_letter (int letter, const char **ptr_msg)
13620 {
13621   if (flag_code == CODE_64BIT)
13622     {
13623       if (letter == 'l')
13624         return SHF_X86_64_LARGE;
13625
13626       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
13627     }
13628   else
13629     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
13630   return -1;
13631 }
13632
13633 bfd_vma
13634 x86_64_section_word (char *str, size_t len)
13635 {
13636   if (len == 5 && flag_code == CODE_64BIT && CONST_STRNEQ (str, "large"))
13637     return SHF_X86_64_LARGE;
13638
13639   return -1;
13640 }
13641
13642 static void
13643 handle_large_common (int small ATTRIBUTE_UNUSED)
13644 {
13645   if (flag_code != CODE_64BIT)
13646     {
13647       s_comm_internal (0, elf_common_parse);
13648       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
13649     }
13650   else
13651     {
13652       static segT lbss_section;
13653       asection *saved_com_section_ptr = elf_com_section_ptr;
13654       asection *saved_bss_section = bss_section;
13655
13656       if (lbss_section == NULL)
13657         {
13658           flagword applicable;
13659           segT seg = now_seg;
13660           subsegT subseg = now_subseg;
13661
13662           /* The .lbss section is for local .largecomm symbols.  */
13663           lbss_section = subseg_new (".lbss", 0);
13664           applicable = bfd_applicable_section_flags (stdoutput);
13665           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
13666           seg_info (lbss_section)->bss = 1;
13667
13668           subseg_set (seg, subseg);
13669         }
13670
13671       elf_com_section_ptr = &_bfd_elf_large_com_section;
13672       bss_section = lbss_section;
13673
13674       s_comm_internal (0, elf_common_parse);
13675
13676       elf_com_section_ptr = saved_com_section_ptr;
13677       bss_section = saved_bss_section;
13678     }
13679 }
13680 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */