gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2020 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus ([email protected]).
  23    x86_64 support by Jan Hubicka ([email protected])
  24    VIA PadLock support by Michal Ludvig ([email protected])
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "elf/x86-64.h"
  34 #include "opcodes/i386-init.h"
  35
  36 #ifdef HAVE_LIMITS_H
  37 #include <limits.h>
  38 #else
  39 #ifdef HAVE_SYS_PARAM_H
  40 #include <sys/param.h>
  41 #endif
  42 #ifndef INT_MAX
  43 #define INT_MAX (int) (((unsigned) (-1)) >> 1)
  44 #endif
  45 #endif
  46
  47 #ifndef INFER_ADDR_PREFIX
  48 #define INFER_ADDR_PREFIX 1
  49 #endif
  50
  51 #ifndef DEFAULT_ARCH
  52 #define DEFAULT_ARCH "i386"
  53 #endif
  54
  55 #ifndef INLINE
  56 #if __GNUC__ >= 2
  57 #define INLINE __inline__
  58 #else
  59 #define INLINE
  60 #endif
  61 #endif
  62
  63 /* Prefixes will be emitted in the order defined below.
  64    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  65    instruction, and so must come before any prefixes.
  66    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  67    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  68 #define WAIT_PREFIX     0
  69 #define SEG_PREFIX      1
  70 #define ADDR_PREFIX     2
  71 #define DATA_PREFIX     3
  72 #define REP_PREFIX      4
  73 #define HLE_PREFIX      REP_PREFIX
  74 #define BND_PREFIX      REP_PREFIX
  75 #define LOCK_PREFIX     5
  76 #define REX_PREFIX      6       /* must come last.  */
  77 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  78
  79 /* we define the syntax here (modulo base,index,scale syntax) */
  80 #define REGISTER_PREFIX '%'
  81 #define IMMEDIATE_PREFIX '$'
  82 #define ABSOLUTE_PREFIX '*'
  83
  84 /* these are the instruction mnemonic suffixes in AT&T syntax or
  85    memory operand size in Intel syntax.  */
  86 #define WORD_MNEM_SUFFIX  'w'
  87 #define BYTE_MNEM_SUFFIX  'b'
  88 #define SHORT_MNEM_SUFFIX 's'
  89 #define LONG_MNEM_SUFFIX  'l'
  90 #define QWORD_MNEM_SUFFIX  'q'
  91 /* Intel Syntax.  Use a non-ascii letter since since it never appears
  92    in instructions.  */
  93 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
  94
  95 #define END_OF_INSN '\0'
  96
  97 /* This matches the C -> StaticRounding alias in the opcode table.  */
  98 #define commutative staticrounding
  99
 100 /*
 101   'templates' is for grouping together 'template' structures for opcodes
 102   of the same name.  This is only used for storing the insns in the grand
 103   ole hash table of insns.
 104   The templates themselves start at START and range up to (but not including)
 105   END.
 106   */
 107 typedef struct
 108 {
 109   const insn_template *start;
 110   const insn_template *end;
 111 }
 112 templates;
 113
 114 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 115 typedef struct
 116 {
 117   unsigned int regmem;  /* codes register or memory operand */
 118   unsigned int reg;     /* codes register operand (or extended opcode) */
 119   unsigned int mode;    /* how to interpret regmem & reg */
 120 }
 121 modrm_byte;
 122
 123 /* x86-64 extension prefix.  */
 124 typedef int rex_byte;
 125
 126 /* 386 opcode byte to code indirect addressing.  */
 127 typedef struct
 128 {
 129   unsigned base;
 130   unsigned index;
 131   unsigned scale;
 132 }
 133 sib_byte;
 134
 135 /* x86 arch names, types and features */
 136 typedef struct
 137 {
 138   const char *name;             /* arch name */
 139   unsigned int len;             /* arch string length */
 140   enum processor_type type;     /* arch type */
 141   i386_cpu_flags flags;         /* cpu feature flags */
 142   unsigned int skip;            /* show_arch should skip this. */
 143 }
 144 arch_entry;
 145
 146 /* Used to turn off indicated flags.  */
 147 typedef struct
 148 {
 149   const char *name;             /* arch name */
 150   unsigned int len;             /* arch string length */
 151   i386_cpu_flags flags;         /* cpu feature flags */
 152 }
 153 noarch_entry;
 154
 155 static void update_code_flag (int, int);
 156 static void set_code_flag (int);
 157 static void set_16bit_gcc_code_flag (int);
 158 static void set_intel_syntax (int);
 159 static void set_intel_mnemonic (int);
 160 static void set_allow_index_reg (int);
 161 static void set_check (int);
 162 static void set_cpu_arch (int);
 163 #ifdef TE_PE
 164 static void pe_directive_secrel (int);
 165 #endif
 166 static void signed_cons (int);
 167 static char *output_invalid (int c);
 168 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 169                                     const char *);
 170 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 171                                        const char *);
 172 static int i386_att_operand (char *);
 173 static int i386_intel_operand (char *, int);
 174 static int i386_intel_simplify (expressionS *);
 175 static int i386_intel_parse_name (const char *, expressionS *);
 176 static const reg_entry *parse_register (char *, char **);
 177 static char *parse_insn (char *, char *);
 178 static char *parse_operands (char *, const char *);
 179 static void swap_operands (void);
 180 static void swap_2_operands (int, int);
 181 static enum flag_code i386_addressing_mode (void);
 182 static void optimize_imm (void);
 183 static void optimize_disp (void);
 184 static const insn_template *match_template (char);
 185 static int check_string (void);
 186 static int process_suffix (void);
 187 static int check_byte_reg (void);
 188 static int check_long_reg (void);
 189 static int check_qword_reg (void);
 190 static int check_word_reg (void);
 191 static int finalize_imm (void);
 192 static int process_operands (void);
 193 static const seg_entry *build_modrm_byte (void);
 194 static void output_insn (void);
 195 static void output_imm (fragS *, offsetT);
 196 static void output_disp (fragS *, offsetT);
 197 #ifndef I386COFF
 198 static void s_bss (int);
 199 #endif
 200 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 201 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 202
 203 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 204 static unsigned int x86_isa_1_used;
 205 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 206 static unsigned int x86_feature_2_used;
 207 /* Generate x86 used ISA and feature properties.  */
 208 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 209 #endif
 210
 211 static const char *default_arch = DEFAULT_ARCH;
 212
 213 /* This struct describes rounding control and SAE in the instruction.  */
 214 struct RC_Operation
 215 {
 216   enum rc_type
 217     {
 218       rne = 0,
 219       rd,
 220       ru,
 221       rz,
 222       saeonly
 223     } type;
 224   int operand;
 225 };
 226
 227 static struct RC_Operation rc_op;
 228
 229 /* The struct describes masking, applied to OPERAND in the instruction.
 230    MASK is a pointer to the corresponding mask register.  ZEROING tells
 231    whether merging or zeroing mask is used.  */
 232 struct Mask_Operation
 233 {
 234   const reg_entry *mask;
 235   unsigned int zeroing;
 236   /* The operand where this operation is associated.  */
 237   int operand;
 238 };
 239
 240 static struct Mask_Operation mask_op;
 241
 242 /* The struct describes broadcasting, applied to OPERAND.  FACTOR is
 243    broadcast factor.  */
 244 struct Broadcast_Operation
 245 {
 246   /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
 247   int type;
 248
 249   /* Index of broadcasted operand.  */
 250   int operand;
 251
 252   /* Number of bytes to broadcast.  */
 253   int bytes;
 254 };
 255
 256 static struct Broadcast_Operation broadcast_op;
 257
 258 /* VEX prefix.  */
 259 typedef struct
 260 {
 261   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 262   unsigned char bytes[4];
 263   unsigned int length;
 264   /* Destination or source register specifier.  */
 265   const reg_entry *register_specifier;
 266 } vex_prefix;
 267
 268 /* 'md_assemble ()' gathers together information and puts it into a
 269    i386_insn.  */
 270
 271 union i386_op
 272   {
 273     expressionS *disps;
 274     expressionS *imms;
 275     const reg_entry *regs;
 276   };
 277
 278 enum i386_error
 279   {
 280     operand_size_mismatch,
 281     operand_type_mismatch,
 282     register_type_mismatch,
 283     number_of_operands_mismatch,
 284     invalid_instruction_suffix,
 285     bad_imm4,
 286     unsupported_with_intel_mnemonic,
 287     unsupported_syntax,
 288     unsupported,
 289     invalid_vsib_address,
 290     invalid_vector_register_set,
 291     unsupported_vector_index_register,
 292     unsupported_broadcast,
 293     broadcast_needed,
 294     unsupported_masking,
 295     mask_not_on_destination,
 296     no_default_mask,
 297     unsupported_rc_sae,
 298     rc_sae_operand_not_last_imm,
 299     invalid_register_operand,
 300   };
 301
 302 struct _i386_insn
 303   {
 304     /* TM holds the template for the insn were currently assembling.  */
 305     insn_template tm;
 306
 307     /* SUFFIX holds the instruction size suffix for byte, word, dword
 308        or qword, if given.  */
 309     char suffix;
 310
 311     /* OPERANDS gives the number of given operands.  */
 312     unsigned int operands;
 313
 314     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 315        of given register, displacement, memory operands and immediate
 316        operands.  */
 317     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 318
 319     /* TYPES [i] is the type (see above #defines) which tells us how to
 320        use OP[i] for the corresponding operand.  */
 321     i386_operand_type types[MAX_OPERANDS];
 322
 323     /* Displacement expression, immediate expression, or register for each
 324        operand.  */
 325     union i386_op op[MAX_OPERANDS];
 326
 327     /* Flags for operands.  */
 328     unsigned int flags[MAX_OPERANDS];
 329 #define Operand_PCrel 1
 330 #define Operand_Mem   2
 331
 332     /* Relocation type for operand */
 333     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 334
 335     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 336        the base index byte below.  */
 337     const reg_entry *base_reg;
 338     const reg_entry *index_reg;
 339     unsigned int log2_scale_factor;
 340
 341     /* SEG gives the seg_entries of this insn.  They are zero unless
 342        explicit segment overrides are given.  */
 343     const seg_entry *seg[2];
 344
 345     /* Copied first memory operand string, for re-checking.  */
 346     char *memop1_string;
 347
 348     /* PREFIX holds all the given prefix opcodes (usually null).
 349        PREFIXES is the number of prefix opcodes.  */
 350     unsigned int prefixes;
 351     unsigned char prefix[MAX_PREFIXES];
 352
 353     /* Register is in low 3 bits of opcode.  */
 354     bfd_boolean short_form;
 355
 356     /* The operand to a branch insn indicates an absolute branch.  */
 357     bfd_boolean jumpabsolute;
 358
 359     /* Has MMX register operands.  */
 360     bfd_boolean has_regmmx;
 361
 362     /* Has XMM register operands.  */
 363     bfd_boolean has_regxmm;
 364
 365     /* Has YMM register operands.  */
 366     bfd_boolean has_regymm;
 367
 368     /* Has ZMM register operands.  */
 369     bfd_boolean has_regzmm;
 370
 371     /* Has GOTPC or TLS relocation.  */
 372     bfd_boolean has_gotpc_tls_reloc;
 373
 374     /* RM and SIB are the modrm byte and the sib byte where the
 375        addressing modes of this insn are encoded.  */
 376     modrm_byte rm;
 377     rex_byte rex;
 378     rex_byte vrex;
 379     sib_byte sib;
 380     vex_prefix vex;
 381
 382     /* Masking attributes.  */
 383     struct Mask_Operation *mask;
 384
 385     /* Rounding control and SAE attributes.  */
 386     struct RC_Operation *rounding;
 387
 388     /* Broadcasting attributes.  */
 389     struct Broadcast_Operation *broadcast;
 390
 391     /* Compressed disp8*N attribute.  */
 392     unsigned int memshift;
 393
 394     /* Prefer load or store in encoding.  */
 395     enum
 396       {
 397         dir_encoding_default = 0,
 398         dir_encoding_load,
 399         dir_encoding_store,
 400         dir_encoding_swap
 401       } dir_encoding;
 402
 403     /* Prefer 8bit or 32bit displacement in encoding.  */
 404     enum
 405       {
 406         disp_encoding_default = 0,
 407         disp_encoding_8bit,
 408         disp_encoding_32bit
 409       } disp_encoding;
 410
 411     /* Prefer the REX byte in encoding.  */
 412     bfd_boolean rex_encoding;
 413
 414     /* Disable instruction size optimization.  */
 415     bfd_boolean no_optimize;
 416
 417     /* How to encode vector instructions.  */
 418     enum
 419       {
 420         vex_encoding_default = 0,
 421         vex_encoding_vex,
 422         vex_encoding_vex3,
 423         vex_encoding_evex
 424       } vec_encoding;
 425
 426     /* REP prefix.  */
 427     const char *rep_prefix;
 428
 429     /* HLE prefix.  */
 430     const char *hle_prefix;
 431
 432     /* Have BND prefix.  */
 433     const char *bnd_prefix;
 434
 435     /* Have NOTRACK prefix.  */
 436     const char *notrack_prefix;
 437
 438     /* Error message.  */
 439     enum i386_error error;
 440   };
 441
 442 typedef struct _i386_insn i386_insn;
 443
 444 /* Link RC type with corresponding string, that'll be looked for in
 445    asm.  */
 446 struct RC_name
 447 {
 448   enum rc_type type;
 449   const char *name;
 450   unsigned int len;
 451 };
 452
 453 static const struct RC_name RC_NamesTable[] =
 454 {
 455   {  rne, STRING_COMMA_LEN ("rn-sae") },
 456   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 457   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 458   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 459   {  saeonly,  STRING_COMMA_LEN ("sae") },
 460 };
 461
 462 /* List of chars besides those in app.c:symbol_chars that can start an
 463    operand.  Used to prevent the scrubber eating vital white-space.  */
 464 const char extra_symbol_chars[] = "*%-([{}"
 465 #ifdef LEX_AT
 466         "@"
 467 #endif
 468 #ifdef LEX_QM
 469         "?"
 470 #endif
 471         ;
 472
 473 #if (defined (TE_I386AIX)                               \
 474      || ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
 475          && !defined (TE_GNU)                           \
 476          && !defined (TE_LINUX)                         \
 477          && !defined (TE_NACL)                          \
 478          && !defined (TE_FreeBSD)                       \
 479          && !defined (TE_DragonFly)                     \
 480          && !defined (TE_NetBSD)))
 481 /* This array holds the chars that always start a comment.  If the
 482    pre-processor is disabled, these aren't very useful.  The option
 483    --divide will remove '/' from this list.  */
 484 const char *i386_comment_chars = "#/";
 485 #define SVR4_COMMENT_CHARS 1
 486 #define PREFIX_SEPARATOR '\\'
 487
 488 #else
 489 const char *i386_comment_chars = "#";
 490 #define PREFIX_SEPARATOR '/'
 491 #endif
 492
 493 /* This array holds the chars that only start a comment at the beginning of
 494    a line.  If the line seems to have the form '# 123 filename'
 495    .line and .file directives will appear in the pre-processed output.
 496    Note that input_file.c hand checks for '#' at the beginning of the
 497    first line of the input file.  This is because the compiler outputs
 498    #NO_APP at the beginning of its output.
 499    Also note that comments started like this one will always work if
 500    '/' isn't otherwise defined.  */
 501 const char line_comment_chars[] = "#/";
 502
 503 const char line_separator_chars[] = ";";
 504
 505 /* Chars that can be used to separate mant from exp in floating point
 506    nums.  */
 507 const char EXP_CHARS[] = "eE";
 508
 509 /* Chars that mean this number is a floating point constant
 510    As in 0f12.456
 511    or    0d1.2345e12.  */
 512 const char FLT_CHARS[] = "fFdDxX";
 513
 514 /* Tables for lexical analysis.  */
 515 static char mnemonic_chars[256];
 516 static char register_chars[256];
 517 static char operand_chars[256];
 518 static char identifier_chars[256];
 519 static char digit_chars[256];
 520
 521 /* Lexical macros.  */
 522 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 523 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 524 #define is_register_char(x) (register_chars[(unsigned char) x])
 525 #define is_space_char(x) ((x) == ' ')
 526 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 527 #define is_digit_char(x) (digit_chars[(unsigned char) x])
 528
 529 /* All non-digit non-letter characters that may occur in an operand.  */
 530 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 531
 532 /* md_assemble() always leaves the strings it's passed unaltered.  To
 533    effect this we maintain a stack of saved characters that we've smashed
 534    with '\0's (indicating end of strings for various sub-fields of the
 535    assembler instruction).  */
 536 static char save_stack[32];
 537 static char *save_stack_p;
 538 #define END_STRING_AND_SAVE(s) \
 539         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 540 #define RESTORE_END_STRING(s) \
 541         do { *(s) = *--save_stack_p; } while (0)
 542
 543 /* The instruction we're assembling.  */
 544 static i386_insn i;
 545
 546 /* Possible templates for current insn.  */
 547 static const templates *current_templates;
 548
 549 /* Per instruction expressionS buffers: max displacements & immediates.  */
 550 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 551 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 552
 553 /* Current operand we are working on.  */
 554 static int this_operand = -1;
 555
 556 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 557    these.  */
 558
 559 enum flag_code {
 560         CODE_32BIT,
 561         CODE_16BIT,
 562         CODE_64BIT };
 563
 564 static enum flag_code flag_code;
 565 static unsigned int object_64bit;
 566 static unsigned int disallow_64bit_reloc;
 567 static int use_rela_relocations = 0;
 568 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 569 static const char *tls_get_addr;
 570
 571 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 572      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 573      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 574
 575 /* The ELF ABI to use.  */
 576 enum x86_elf_abi
 577 {
 578   I386_ABI,
 579   X86_64_ABI,
 580   X86_64_X32_ABI
 581 };
 582
 583 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 584 #endif
 585
 586 #if defined (TE_PE) || defined (TE_PEP)
 587 /* Use big object file format.  */
 588 static int use_big_obj = 0;
 589 #endif
 590
 591 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 592 /* 1 if generating code for a shared library.  */
 593 static int shared = 0;
 594 #endif
 595
 596 /* 1 for intel syntax,
 597    0 if att syntax.  */
 598 static int intel_syntax = 0;
 599
 600 static enum x86_64_isa
 601 {
 602   amd64 = 1,    /* AMD64 ISA.  */
 603   intel64       /* Intel64 ISA.  */
 604 } isa64;
 605
 606 /* 1 for intel mnemonic,
 607    0 if att mnemonic.  */
 608 static int intel_mnemonic = !SYSV386_COMPAT;
 609
 610 /* 1 if pseudo registers are permitted.  */
 611 static int allow_pseudo_reg = 0;
 612
 613 /* 1 if register prefix % not required.  */
 614 static int allow_naked_reg = 0;
 615
 616 /* 1 if the assembler should add BND prefix for all control-transferring
 617    instructions supporting it, even if this prefix wasn't specified
 618    explicitly.  */
 619 static int add_bnd_prefix = 0;
 620
 621 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 622 static int allow_index_reg = 0;
 623
 624 /* 1 if the assembler should ignore LOCK prefix, even if it was
 625    specified explicitly.  */
 626 static int omit_lock_prefix = 0;
 627
 628 /* 1 if the assembler should encode lfence, mfence, and sfence as
 629    "lock addl $0, (%{re}sp)".  */
 630 static int avoid_fence = 0;
 631
 632 /* Type of the previous instruction.  */
 633 static struct
 634   {
 635     segT seg;
 636     const char *file;
 637     const char *name;
 638     unsigned int line;
 639     enum last_insn_kind
 640       {
 641         last_insn_other = 0,
 642         last_insn_directive,
 643         last_insn_prefix
 644       } kind;
 645   } last_insn;
 646
 647 /* 1 if the assembler should generate relax relocations.  */
 648
 649 static int generate_relax_relocations
 650   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 651
 652 static enum check_kind
 653   {
 654     check_none = 0,
 655     check_warning,
 656     check_error
 657   }
 658 sse_check, operand_check = check_warning;
 659
 660 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 661 static int align_branch_power = 0;
 662
 663 /* Types of branches to align.  */
 664 enum align_branch_kind
 665   {
 666     align_branch_none = 0,
 667     align_branch_jcc = 1,
 668     align_branch_fused = 2,
 669     align_branch_jmp = 3,
 670     align_branch_call = 4,
 671     align_branch_indirect = 5,
 672     align_branch_ret = 6
 673   };
 674
 675 /* Type bits of branches to align.  */
 676 enum align_branch_bit
 677   {
 678     align_branch_jcc_bit = 1 << align_branch_jcc,
 679     align_branch_fused_bit = 1 << align_branch_fused,
 680     align_branch_jmp_bit = 1 << align_branch_jmp,
 681     align_branch_call_bit = 1 << align_branch_call,
 682     align_branch_indirect_bit = 1 << align_branch_indirect,
 683     align_branch_ret_bit = 1 << align_branch_ret
 684   };
 685
 686 static unsigned int align_branch = (align_branch_jcc_bit
 687                                     | align_branch_fused_bit
 688                                     | align_branch_jmp_bit);
 689
 690 /* Types of condition jump used by macro-fusion.  */
 691 enum mf_jcc_kind
 692   {
 693     mf_jcc_jo = 0,  /* base opcode 0x70  */
 694     mf_jcc_jc,      /* base opcode 0x72  */
 695     mf_jcc_je,      /* base opcode 0x74  */
 696     mf_jcc_jna,     /* base opcode 0x76  */
 697     mf_jcc_js,      /* base opcode 0x78  */
 698     mf_jcc_jp,      /* base opcode 0x7a  */
 699     mf_jcc_jl,      /* base opcode 0x7c  */
 700     mf_jcc_jle,     /* base opcode 0x7e  */
 701   };
 702
 703 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 704 enum mf_cmp_kind
 705   {
 706     mf_cmp_test_and,  /* test/cmp */
 707     mf_cmp_alu_cmp,  /* add/sub/cmp */
 708     mf_cmp_incdec  /* inc/dec */
 709   };
 710
 711 /* The maximum padding size for fused jcc.  CMP like instruction can
 712    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 713    prefixes.   */
 714 #define MAX_FUSED_JCC_PADDING_SIZE 20
 715
 716 /* The maximum number of prefixes added for an instruction.  */
 717 static unsigned int align_branch_prefix_size = 5;
 718
 719 /* Optimization:
 720    1. Clear the REX_W bit with register operand if possible.
 721    2. Above plus use 128bit vector instruction to clear the full vector
 722       register.
 723  */
 724 static int optimize = 0;
 725
 726 /* Optimization:
 727    1. Clear the REX_W bit with register operand if possible.
 728    2. Above plus use 128bit vector instruction to clear the full vector
 729       register.
 730    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 731       "testb $imm7,%r8".
 732  */
 733 static int optimize_for_space = 0;
 734
 735 /* Register prefix used for error message.  */
 736 static const char *register_prefix = "%";
 737
 738 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 739    leave, push, and pop instructions so that gcc has the same stack
 740    frame as in 32 bit mode.  */
 741 static char stackop_size = '\0';
 742
 743 /* Non-zero to optimize code alignment.  */
 744 int optimize_align_code = 1;
 745
 746 /* Non-zero to quieten some warnings.  */
 747 static int quiet_warnings = 0;
 748
 749 /* CPU name.  */
 750 static const char *cpu_arch_name = NULL;
 751 static char *cpu_sub_arch_name = NULL;
 752
 753 /* CPU feature flags.  */
 754 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 755
 756 /* If we have selected a cpu we are generating instructions for.  */
 757 static int cpu_arch_tune_set = 0;
 758
 759 /* Cpu we are generating instructions for.  */
 760 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 761
 762 /* CPU feature flags of cpu we are generating instructions for.  */
 763 static i386_cpu_flags cpu_arch_tune_flags;
 764
 765 /* CPU instruction set architecture used.  */
 766 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 767
 768 /* CPU feature flags of instruction set architecture used.  */
 769 i386_cpu_flags cpu_arch_isa_flags;
 770
 771 /* If set, conditional jumps are not automatically promoted to handle
 772    larger than a byte offset.  */
 773 static unsigned int no_cond_jump_promotion = 0;
 774
 775 /* Encode SSE instructions with VEX prefix.  */
 776 static unsigned int sse2avx;
 777
 778 /* Encode scalar AVX instructions with specific vector length.  */
 779 static enum
 780   {
 781     vex128 = 0,
 782     vex256
 783   } avxscalar;
 784
 785 /* Encode VEX WIG instructions with specific vex.w.  */
 786 static enum
 787   {
 788     vexw0 = 0,
 789     vexw1
 790   } vexwig;
 791
 792 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 793 static enum
 794   {
 795     evexl128 = 0,
 796     evexl256,
 797     evexl512
 798   } evexlig;
 799
 800 /* Encode EVEX WIG instructions with specific evex.w.  */
 801 static enum
 802   {
 803     evexw0 = 0,
 804     evexw1
 805   } evexwig;
 806
 807 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 808 static enum rc_type evexrcig = rne;
 809
 810 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 811 static symbolS *GOT_symbol;
 812
 813 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 814 unsigned int x86_dwarf2_return_column;
 815
 816 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 817 int x86_cie_data_alignment;
 818
 819 /* Interface to relax_segment.
 820    There are 3 major relax states for 386 jump insns because the
 821    different types of jumps add different sizes to frags when we're
 822    figuring out what sort of jump to choose to reach a given label.
 823
 824    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 825    branches which are handled by md_estimate_size_before_relax() and
 826    i386_generic_table_relax_frag().  */
 827
 828 /* Types.  */
 829 #define UNCOND_JUMP 0
 830 #define COND_JUMP 1
 831 #define COND_JUMP86 2
 832 #define BRANCH_PADDING 3
 833 #define BRANCH_PREFIX 4
 834 #define FUSED_JCC_PADDING 5
 835
 836 /* Sizes.  */
 837 #define CODE16  1
 838 #define SMALL   0
 839 #define SMALL16 (SMALL | CODE16)
 840 #define BIG     2
 841 #define BIG16   (BIG | CODE16)
 842
 843 #ifndef INLINE
 844 #ifdef __GNUC__
 845 #define INLINE __inline__
 846 #else
 847 #define INLINE
 848 #endif
 849 #endif
 850
 851 #define ENCODE_RELAX_STATE(type, size) \
 852   ((relax_substateT) (((type) << 2) | (size)))
 853 #define TYPE_FROM_RELAX_STATE(s) \
 854   ((s) >> 2)
 855 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 856     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 857
 858 /* This table is used by relax_frag to promote short jumps to long
 859    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 860    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 861    don't allow a short jump in a 32 bit code segment to be promoted to
 862    a 16 bit offset jump because it's slower (requires data size
 863    prefix), and doesn't work, unless the destination is in the bottom
 864    64k of the code segment (The top 16 bits of eip are zeroed).  */
 865
 866 const relax_typeS md_relax_table[] =
 867 {
 868   /* The fields are:
 869      1) most positive reach of this state,
 870      2) most negative reach of this state,
 871      3) how many bytes this mode will have in the variable part of the frag
 872      4) which index into the table to try if we can't fit into this one.  */
 873
 874   /* UNCOND_JUMP states.  */
 875   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 876   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 877   /* dword jmp adds 4 bytes to frag:
 878      0 extra opcode bytes, 4 displacement bytes.  */
 879   {0, 0, 4, 0},
 880   /* word jmp adds 2 byte2 to frag:
 881      0 extra opcode bytes, 2 displacement bytes.  */
 882   {0, 0, 2, 0},
 883
 884   /* COND_JUMP states.  */
 885   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 886   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 887   /* dword conditionals adds 5 bytes to frag:
 888      1 extra opcode byte, 4 displacement bytes.  */
 889   {0, 0, 5, 0},
 890   /* word conditionals add 3 bytes to frag:
 891      1 extra opcode byte, 2 displacement bytes.  */
 892   {0, 0, 3, 0},
 893
 894   /* COND_JUMP86 states.  */
 895   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 896   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 897   /* dword conditionals adds 5 bytes to frag:
 898      1 extra opcode byte, 4 displacement bytes.  */
 899   {0, 0, 5, 0},
 900   /* word conditionals add 4 bytes to frag:
 901      1 displacement byte and a 3 byte long branch insn.  */
 902   {0, 0, 4, 0}
 903 };
 904
 905 static const arch_entry cpu_arch[] =
 906 {
 907   /* Do not replace the first two entries - i386_target_format()
 908      relies on them being there in this order.  */
 909   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
 910     CPU_GENERIC32_FLAGS, 0 },
 911   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
 912     CPU_GENERIC64_FLAGS, 0 },
 913   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
 914     CPU_NONE_FLAGS, 0 },
 915   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
 916     CPU_I186_FLAGS, 0 },
 917   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
 918     CPU_I286_FLAGS, 0 },
 919   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
 920     CPU_I386_FLAGS, 0 },
 921   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
 922     CPU_I486_FLAGS, 0 },
 923   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
 924     CPU_I586_FLAGS, 0 },
 925   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
 926     CPU_I686_FLAGS, 0 },
 927   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
 928     CPU_I586_FLAGS, 0 },
 929   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
 930     CPU_PENTIUMPRO_FLAGS, 0 },
 931   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
 932     CPU_P2_FLAGS, 0 },
 933   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
 934     CPU_P3_FLAGS, 0 },
 935   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
 936     CPU_P4_FLAGS, 0 },
 937   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
 938     CPU_CORE_FLAGS, 0 },
 939   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
 940     CPU_NOCONA_FLAGS, 0 },
 941   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
 942     CPU_CORE_FLAGS, 1 },
 943   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
 944     CPU_CORE_FLAGS, 0 },
 945   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
 946     CPU_CORE2_FLAGS, 1 },
 947   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
 948     CPU_CORE2_FLAGS, 0 },
 949   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
 950     CPU_COREI7_FLAGS, 0 },
 951   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
 952     CPU_L1OM_FLAGS, 0 },
 953   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
 954     CPU_K1OM_FLAGS, 0 },
 955   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
 956     CPU_IAMCU_FLAGS, 0 },
 957   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
 958     CPU_K6_FLAGS, 0 },
 959   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
 960     CPU_K6_2_FLAGS, 0 },
 961   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
 962     CPU_ATHLON_FLAGS, 0 },
 963   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
 964     CPU_K8_FLAGS, 1 },
 965   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
 966     CPU_K8_FLAGS, 0 },
 967   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
 968     CPU_K8_FLAGS, 0 },
 969   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
 970     CPU_AMDFAM10_FLAGS, 0 },
 971   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
 972     CPU_BDVER1_FLAGS, 0 },
 973   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
 974     CPU_BDVER2_FLAGS, 0 },
 975   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
 976     CPU_BDVER3_FLAGS, 0 },
 977   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
 978     CPU_BDVER4_FLAGS, 0 },
 979   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
 980     CPU_ZNVER1_FLAGS, 0 },
 981   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
 982     CPU_ZNVER2_FLAGS, 0 },
 983   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
 984     CPU_BTVER1_FLAGS, 0 },
 985   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
 986     CPU_BTVER2_FLAGS, 0 },
 987   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
 988     CPU_8087_FLAGS, 0 },
 989   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
 990     CPU_287_FLAGS, 0 },
 991   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
 992     CPU_387_FLAGS, 0 },
 993   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
 994     CPU_687_FLAGS, 0 },
 995   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
 996     CPU_CMOV_FLAGS, 0 },
 997   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
 998     CPU_FXSR_FLAGS, 0 },
 999   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
1000     CPU_MMX_FLAGS, 0 },
1001   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
1002     CPU_SSE_FLAGS, 0 },
1003   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
1004     CPU_SSE2_FLAGS, 0 },
1005   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
1006     CPU_SSE3_FLAGS, 0 },
1007   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1008     CPU_SSE4A_FLAGS, 0 },
1009   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
1010     CPU_SSSE3_FLAGS, 0 },
1011   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
1012     CPU_SSE4_1_FLAGS, 0 },
1013   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
1014     CPU_SSE4_2_FLAGS, 0 },
1015   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
1016     CPU_SSE4_2_FLAGS, 0 },
1017   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
1018     CPU_AVX_FLAGS, 0 },
1019   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
1020     CPU_AVX2_FLAGS, 0 },
1021   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
1022     CPU_AVX512F_FLAGS, 0 },
1023   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1024     CPU_AVX512CD_FLAGS, 0 },
1025   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1026     CPU_AVX512ER_FLAGS, 0 },
1027   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1028     CPU_AVX512PF_FLAGS, 0 },
1029   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1030     CPU_AVX512DQ_FLAGS, 0 },
1031   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1032     CPU_AVX512BW_FLAGS, 0 },
1033   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1034     CPU_AVX512VL_FLAGS, 0 },
1035   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1036     CPU_VMX_FLAGS, 0 },
1037   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1038     CPU_VMFUNC_FLAGS, 0 },
1039   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1040     CPU_SMX_FLAGS, 0 },
1041   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1042     CPU_XSAVE_FLAGS, 0 },
1043   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1044     CPU_XSAVEOPT_FLAGS, 0 },
1045   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1046     CPU_XSAVEC_FLAGS, 0 },
1047   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1048     CPU_XSAVES_FLAGS, 0 },
1049   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1050     CPU_AES_FLAGS, 0 },
1051   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1052     CPU_PCLMUL_FLAGS, 0 },
1053   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1054     CPU_PCLMUL_FLAGS, 1 },
1055   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1056     CPU_FSGSBASE_FLAGS, 0 },
1057   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1058     CPU_RDRND_FLAGS, 0 },
1059   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1060     CPU_F16C_FLAGS, 0 },
1061   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1062     CPU_BMI2_FLAGS, 0 },
1063   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1064     CPU_FMA_FLAGS, 0 },
1065   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1066     CPU_FMA4_FLAGS, 0 },
1067   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1068     CPU_XOP_FLAGS, 0 },
1069   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1070     CPU_LWP_FLAGS, 0 },
1071   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1072     CPU_MOVBE_FLAGS, 0 },
1073   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1074     CPU_CX16_FLAGS, 0 },
1075   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1076     CPU_EPT_FLAGS, 0 },
1077   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1078     CPU_LZCNT_FLAGS, 0 },
1079   { STRING_COMMA_LEN (".popcnt"), PROCESSOR_UNKNOWN,
1080     CPU_POPCNT_FLAGS, 0 },
1081   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1082     CPU_HLE_FLAGS, 0 },
1083   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1084     CPU_RTM_FLAGS, 0 },
1085   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1086     CPU_INVPCID_FLAGS, 0 },
1087   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1088     CPU_CLFLUSH_FLAGS, 0 },
1089   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1090     CPU_NOP_FLAGS, 0 },
1091   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1092     CPU_SYSCALL_FLAGS, 0 },
1093   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1094     CPU_RDTSCP_FLAGS, 0 },
1095   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1096     CPU_3DNOW_FLAGS, 0 },
1097   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1098     CPU_3DNOWA_FLAGS, 0 },
1099   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1100     CPU_PADLOCK_FLAGS, 0 },
1101   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1102     CPU_SVME_FLAGS, 1 },
1103   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1104     CPU_SVME_FLAGS, 0 },
1105   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1106     CPU_SSE4A_FLAGS, 0 },
1107   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1108     CPU_ABM_FLAGS, 0 },
1109   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1110     CPU_BMI_FLAGS, 0 },
1111   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1112     CPU_TBM_FLAGS, 0 },
1113   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1114     CPU_ADX_FLAGS, 0 },
1115   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1116     CPU_RDSEED_FLAGS, 0 },
1117   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1118     CPU_PRFCHW_FLAGS, 0 },
1119   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1120     CPU_SMAP_FLAGS, 0 },
1121   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1122     CPU_MPX_FLAGS, 0 },
1123   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1124     CPU_SHA_FLAGS, 0 },
1125   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1126     CPU_CLFLUSHOPT_FLAGS, 0 },
1127   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1128     CPU_PREFETCHWT1_FLAGS, 0 },
1129   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1130     CPU_SE1_FLAGS, 0 },
1131   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1132     CPU_CLWB_FLAGS, 0 },
1133   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1134     CPU_AVX512IFMA_FLAGS, 0 },
1135   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1136     CPU_AVX512VBMI_FLAGS, 0 },
1137   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1138     CPU_AVX512_4FMAPS_FLAGS, 0 },
1139   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1140     CPU_AVX512_4VNNIW_FLAGS, 0 },
1141   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1142     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1143   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1144     CPU_AVX512_VBMI2_FLAGS, 0 },
1145   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1146     CPU_AVX512_VNNI_FLAGS, 0 },
1147   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1148     CPU_AVX512_BITALG_FLAGS, 0 },
1149   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1150     CPU_CLZERO_FLAGS, 0 },
1151   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1152     CPU_MWAITX_FLAGS, 0 },
1153   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1154     CPU_OSPKE_FLAGS, 0 },
1155   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1156     CPU_RDPID_FLAGS, 0 },
1157   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1158     CPU_PTWRITE_FLAGS, 0 },
1159   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1160     CPU_IBT_FLAGS, 0 },
1161   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1162     CPU_SHSTK_FLAGS, 0 },
1163   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1164     CPU_GFNI_FLAGS, 0 },
1165   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1166     CPU_VAES_FLAGS, 0 },
1167   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1168     CPU_VPCLMULQDQ_FLAGS, 0 },
1169   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1170     CPU_WBNOINVD_FLAGS, 0 },
1171   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1172     CPU_PCONFIG_FLAGS, 0 },
1173   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1174     CPU_WAITPKG_FLAGS, 0 },
1175   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1176     CPU_CLDEMOTE_FLAGS, 0 },
1177   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1178     CPU_MOVDIRI_FLAGS, 0 },
1179   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1180     CPU_MOVDIR64B_FLAGS, 0 },
1181   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1182     CPU_AVX512_BF16_FLAGS, 0 },
1183   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1184     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1185   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1186     CPU_ENQCMD_FLAGS, 0 },
1187   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1188     CPU_RDPRU_FLAGS, 0 },
1189   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1190     CPU_MCOMMIT_FLAGS, 0 },
1191   { STRING_COMMA_LEN (".sev_es"), PROCESSOR_UNKNOWN,
1192     CPU_SEV_ES_FLAGS, 0 },
1193 };
1194
1195 static const noarch_entry cpu_noarch[] =
1196 {
1197   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1198   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1199   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1200   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1201   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1202   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1203   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1204   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1205   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1206   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1207   { STRING_COMMA_LEN ("nosse4a"),  CPU_ANY_SSE4A_FLAGS },
1208   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1209   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1210   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1211   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1212   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1213   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1214   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1215   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1216   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1217   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1218   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1219   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1220   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1221   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1222   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1223   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1224   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1225   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1226   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1227   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1228   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1229   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1230   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1231   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1232   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1233   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1234   { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
1235   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1236 };
1237
1238 #ifdef I386COFF
1239 /* Like s_lcomm_internal in gas/read.c but the alignment string
1240    is allowed to be optional.  */
1241
1242 static symbolS *
1243 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1244 {
1245   addressT align = 0;
1246
1247   SKIP_WHITESPACE ();
1248
1249   if (needs_align
1250       && *input_line_pointer == ',')
1251     {
1252       align = parse_align (needs_align - 1);
1253
1254       if (align == (addressT) -1)
1255         return NULL;
1256     }
1257   else
1258     {
1259       if (size >= 8)
1260         align = 3;
1261       else if (size >= 4)
1262         align = 2;
1263       else if (size >= 2)
1264         align = 1;
1265       else
1266         align = 0;
1267     }
1268
1269   bss_alloc (symbolP, size, align);
1270   return symbolP;
1271 }
1272
1273 static void
1274 pe_lcomm (int needs_align)
1275 {
1276   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1277 }
1278 #endif
1279
1280 const pseudo_typeS md_pseudo_table[] =
1281 {
1282 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1283   {"align", s_align_bytes, 0},
1284 #else
1285   {"align", s_align_ptwo, 0},
1286 #endif
1287   {"arch", set_cpu_arch, 0},
1288 #ifndef I386COFF
1289   {"bss", s_bss, 0},
1290 #else
1291   {"lcomm", pe_lcomm, 1},
1292 #endif
1293   {"ffloat", float_cons, 'f'},
1294   {"dfloat", float_cons, 'd'},
1295   {"tfloat", float_cons, 'x'},
1296   {"value", cons, 2},
1297   {"slong", signed_cons, 4},
1298   {"noopt", s_ignore, 0},
1299   {"optim", s_ignore, 0},
1300   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1301   {"code16", set_code_flag, CODE_16BIT},
1302   {"code32", set_code_flag, CODE_32BIT},
1303 #ifdef BFD64
1304   {"code64", set_code_flag, CODE_64BIT},
1305 #endif
1306   {"intel_syntax", set_intel_syntax, 1},
1307   {"att_syntax", set_intel_syntax, 0},
1308   {"intel_mnemonic", set_intel_mnemonic, 1},
1309   {"att_mnemonic", set_intel_mnemonic, 0},
1310   {"allow_index_reg", set_allow_index_reg, 1},
1311   {"disallow_index_reg", set_allow_index_reg, 0},
1312   {"sse_check", set_check, 0},
1313   {"operand_check", set_check, 1},
1314 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1315   {"largecomm", handle_large_common, 0},
1316 #else
1317   {"file", dwarf2_directive_file, 0},
1318   {"loc", dwarf2_directive_loc, 0},
1319   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1320 #endif
1321 #ifdef TE_PE
1322   {"secrel32", pe_directive_secrel, 0},
1323 #endif
1324   {0, 0, 0}
1325 };
1326
1327 /* For interface with expression ().  */
1328 extern char *input_line_pointer;
1329
1330 /* Hash table for instruction mnemonic lookup.  */
1331 static struct hash_control *op_hash;
1332
1333 /* Hash table for register lookup.  */
1334 static struct hash_control *reg_hash;
1335 \f
1336   /* Various efficient no-op patterns for aligning code labels.
1337      Note: Don't try to assemble the instructions in the comments.
1338      0L and 0w are not legal.  */
1339 static const unsigned char f32_1[] =
1340   {0x90};                               /* nop                  */
1341 static const unsigned char f32_2[] =
1342   {0x66,0x90};                          /* xchg %ax,%ax         */
1343 static const unsigned char f32_3[] =
1344   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1345 static const unsigned char f32_4[] =
1346   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1347 static const unsigned char f32_6[] =
1348   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1349 static const unsigned char f32_7[] =
1350   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1351 static const unsigned char f16_3[] =
1352   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1353 static const unsigned char f16_4[] =
1354   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1355 static const unsigned char jump_disp8[] =
1356   {0xeb};                               /* jmp disp8           */
1357 static const unsigned char jump32_disp32[] =
1358   {0xe9};                               /* jmp disp32          */
1359 static const unsigned char jump16_disp32[] =
1360   {0x66,0xe9};                          /* jmp disp32          */
1361 /* 32-bit NOPs patterns.  */
1362 static const unsigned char *const f32_patt[] = {
1363   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1364 };
1365 /* 16-bit NOPs patterns.  */
1366 static const unsigned char *const f16_patt[] = {
1367   f32_1, f32_2, f16_3, f16_4
1368 };
1369 /* nopl (%[re]ax) */
1370 static const unsigned char alt_3[] =
1371   {0x0f,0x1f,0x00};
1372 /* nopl 0(%[re]ax) */
1373 static const unsigned char alt_4[] =
1374   {0x0f,0x1f,0x40,0x00};
1375 /* nopl 0(%[re]ax,%[re]ax,1) */
1376 static const unsigned char alt_5[] =
1377   {0x0f,0x1f,0x44,0x00,0x00};
1378 /* nopw 0(%[re]ax,%[re]ax,1) */
1379 static const unsigned char alt_6[] =
1380   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1381 /* nopl 0L(%[re]ax) */
1382 static const unsigned char alt_7[] =
1383   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1384 /* nopl 0L(%[re]ax,%[re]ax,1) */
1385 static const unsigned char alt_8[] =
1386   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1387 /* nopw 0L(%[re]ax,%[re]ax,1) */
1388 static const unsigned char alt_9[] =
1389   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1390 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1391 static const unsigned char alt_10[] =
1392   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1393 /* data16 nopw %cs:0L(%eax,%eax,1) */
1394 static const unsigned char alt_11[] =
1395   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1396 /* 32-bit and 64-bit NOPs patterns.  */
1397 static const unsigned char *const alt_patt[] = {
1398   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1399   alt_9, alt_10, alt_11
1400 };
1401
1402 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1403    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1404
1405 static void
1406 i386_output_nops (char *where, const unsigned char *const *patt,
1407                   int count, int max_single_nop_size)
1408
1409 {
1410   /* Place the longer NOP first.  */
1411   int last;
1412   int offset;
1413   const unsigned char *nops;
1414
1415   if (max_single_nop_size < 1)
1416     {
1417       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1418                 max_single_nop_size);
1419       return;
1420     }
1421
1422   nops = patt[max_single_nop_size - 1];
1423
1424   /* Use the smaller one if the requsted one isn't available.  */
1425   if (nops == NULL)
1426     {
1427       max_single_nop_size--;
1428       nops = patt[max_single_nop_size - 1];
1429     }
1430
1431   last = count % max_single_nop_size;
1432
1433   count -= last;
1434   for (offset = 0; offset < count; offset += max_single_nop_size)
1435     memcpy (where + offset, nops, max_single_nop_size);
1436
1437   if (last)
1438     {
1439       nops = patt[last - 1];
1440       if (nops == NULL)
1441         {
1442           /* Use the smaller one plus one-byte NOP if the needed one
1443              isn't available.  */
1444           last--;
1445           nops = patt[last - 1];
1446           memcpy (where + offset, nops, last);
1447           where[offset + last] = *patt[0];
1448         }
1449       else
1450         memcpy (where + offset, nops, last);
1451     }
1452 }
1453
1454 static INLINE int
1455 fits_in_imm7 (offsetT num)
1456 {
1457   return (num & 0x7f) == num;
1458 }
1459
1460 static INLINE int
1461 fits_in_imm31 (offsetT num)
1462 {
1463   return (num & 0x7fffffff) == num;
1464 }
1465
1466 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1467    single NOP instruction LIMIT.  */
1468
1469 void
1470 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1471 {
1472   const unsigned char *const *patt = NULL;
1473   int max_single_nop_size;
1474   /* Maximum number of NOPs before switching to jump over NOPs.  */
1475   int max_number_of_nops;
1476
1477   switch (fragP->fr_type)
1478     {
1479     case rs_fill_nop:
1480     case rs_align_code:
1481       break;
1482     case rs_machine_dependent:
1483       /* Allow NOP padding for jumps and calls.  */
1484       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1485           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1486         break;
1487       /* Fall through.  */
1488     default:
1489       return;
1490     }
1491
1492   /* We need to decide which NOP sequence to use for 32bit and
1493      64bit. When -mtune= is used:
1494
1495      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1496      PROCESSOR_GENERIC32, f32_patt will be used.
1497      2. For the rest, alt_patt will be used.
1498
1499      When -mtune= isn't used, alt_patt will be used if
1500      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1501      be used.
1502
1503      When -march= or .arch is used, we can't use anything beyond
1504      cpu_arch_isa_flags.   */
1505
1506   if (flag_code == CODE_16BIT)
1507     {
1508       patt = f16_patt;
1509       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1510       /* Limit number of NOPs to 2 in 16-bit mode.  */
1511       max_number_of_nops = 2;
1512     }
1513   else
1514     {
1515       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1516         {
1517           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1518           switch (cpu_arch_tune)
1519             {
1520             case PROCESSOR_UNKNOWN:
1521               /* We use cpu_arch_isa_flags to check if we SHOULD
1522                  optimize with nops.  */
1523               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1524                 patt = alt_patt;
1525               else
1526                 patt = f32_patt;
1527               break;
1528             case PROCESSOR_PENTIUM4:
1529             case PROCESSOR_NOCONA:
1530             case PROCESSOR_CORE:
1531             case PROCESSOR_CORE2:
1532             case PROCESSOR_COREI7:
1533             case PROCESSOR_L1OM:
1534             case PROCESSOR_K1OM:
1535             case PROCESSOR_GENERIC64:
1536             case PROCESSOR_K6:
1537             case PROCESSOR_ATHLON:
1538             case PROCESSOR_K8:
1539             case PROCESSOR_AMDFAM10:
1540             case PROCESSOR_BD:
1541             case PROCESSOR_ZNVER:
1542             case PROCESSOR_BT:
1543               patt = alt_patt;
1544               break;
1545             case PROCESSOR_I386:
1546             case PROCESSOR_I486:
1547             case PROCESSOR_PENTIUM:
1548             case PROCESSOR_PENTIUMPRO:
1549             case PROCESSOR_IAMCU:
1550             case PROCESSOR_GENERIC32:
1551               patt = f32_patt;
1552               break;
1553             }
1554         }
1555       else
1556         {
1557           switch (fragP->tc_frag_data.tune)
1558             {
1559             case PROCESSOR_UNKNOWN:
1560               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1561                  PROCESSOR_UNKNOWN.  */
1562               abort ();
1563               break;
1564
1565             case PROCESSOR_I386:
1566             case PROCESSOR_I486:
1567             case PROCESSOR_PENTIUM:
1568             case PROCESSOR_IAMCU:
1569             case PROCESSOR_K6:
1570             case PROCESSOR_ATHLON:
1571             case PROCESSOR_K8:
1572             case PROCESSOR_AMDFAM10:
1573             case PROCESSOR_BD:
1574             case PROCESSOR_ZNVER:
1575             case PROCESSOR_BT:
1576             case PROCESSOR_GENERIC32:
1577               /* We use cpu_arch_isa_flags to check if we CAN optimize
1578                  with nops.  */
1579               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1580                 patt = alt_patt;
1581               else
1582                 patt = f32_patt;
1583               break;
1584             case PROCESSOR_PENTIUMPRO:
1585             case PROCESSOR_PENTIUM4:
1586             case PROCESSOR_NOCONA:
1587             case PROCESSOR_CORE:
1588             case PROCESSOR_CORE2:
1589             case PROCESSOR_COREI7:
1590             case PROCESSOR_L1OM:
1591             case PROCESSOR_K1OM:
1592               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1593                 patt = alt_patt;
1594               else
1595                 patt = f32_patt;
1596               break;
1597             case PROCESSOR_GENERIC64:
1598               patt = alt_patt;
1599               break;
1600             }
1601         }
1602
1603       if (patt == f32_patt)
1604         {
1605           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1606           /* Limit number of NOPs to 2 for older processors.  */
1607           max_number_of_nops = 2;
1608         }
1609       else
1610         {
1611           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1612           /* Limit number of NOPs to 7 for newer processors.  */
1613           max_number_of_nops = 7;
1614         }
1615     }
1616
1617   if (limit == 0)
1618     limit = max_single_nop_size;
1619
1620   if (fragP->fr_type == rs_fill_nop)
1621     {
1622       /* Output NOPs for .nop directive.  */
1623       if (limit > max_single_nop_size)
1624         {
1625           as_bad_where (fragP->fr_file, fragP->fr_line,
1626                         _("invalid single nop size: %d "
1627                           "(expect within [0, %d])"),
1628                         limit, max_single_nop_size);
1629           return;
1630         }
1631     }
1632   else if (fragP->fr_type != rs_machine_dependent)
1633     fragP->fr_var = count;
1634
1635   if ((count / max_single_nop_size) > max_number_of_nops)
1636     {
1637       /* Generate jump over NOPs.  */
1638       offsetT disp = count - 2;
1639       if (fits_in_imm7 (disp))
1640         {
1641           /* Use "jmp disp8" if possible.  */
1642           count = disp;
1643           where[0] = jump_disp8[0];
1644           where[1] = count;
1645           where += 2;
1646         }
1647       else
1648         {
1649           unsigned int size_of_jump;
1650
1651           if (flag_code == CODE_16BIT)
1652             {
1653               where[0] = jump16_disp32[0];
1654               where[1] = jump16_disp32[1];
1655               size_of_jump = 2;
1656             }
1657           else
1658             {
1659               where[0] = jump32_disp32[0];
1660               size_of_jump = 1;
1661             }
1662
1663           count -= size_of_jump + 4;
1664           if (!fits_in_imm31 (count))
1665             {
1666               as_bad_where (fragP->fr_file, fragP->fr_line,
1667                             _("jump over nop padding out of range"));
1668               return;
1669             }
1670
1671           md_number_to_chars (where + size_of_jump, count, 4);
1672           where += size_of_jump + 4;
1673         }
1674     }
1675
1676   /* Generate multiple NOPs.  */
1677   i386_output_nops (where, patt, count, limit);
1678 }
1679
1680 static INLINE int
1681 operand_type_all_zero (const union i386_operand_type *x)
1682 {
1683   switch (ARRAY_SIZE(x->array))
1684     {
1685     case 3:
1686       if (x->array[2])
1687         return 0;
1688       /* Fall through.  */
1689     case 2:
1690       if (x->array[1])
1691         return 0;
1692       /* Fall through.  */
1693     case 1:
1694       return !x->array[0];
1695     default:
1696       abort ();
1697     }
1698 }
1699
1700 static INLINE void
1701 operand_type_set (union i386_operand_type *x, unsigned int v)
1702 {
1703   switch (ARRAY_SIZE(x->array))
1704     {
1705     case 3:
1706       x->array[2] = v;
1707       /* Fall through.  */
1708     case 2:
1709       x->array[1] = v;
1710       /* Fall through.  */
1711     case 1:
1712       x->array[0] = v;
1713       /* Fall through.  */
1714       break;
1715     default:
1716       abort ();
1717     }
1718
1719   x->bitfield.class = ClassNone;
1720   x->bitfield.instance = InstanceNone;
1721 }
1722
1723 static INLINE int
1724 operand_type_equal (const union i386_operand_type *x,
1725                     const union i386_operand_type *y)
1726 {
1727   switch (ARRAY_SIZE(x->array))
1728     {
1729     case 3:
1730       if (x->array[2] != y->array[2])
1731         return 0;
1732       /* Fall through.  */
1733     case 2:
1734       if (x->array[1] != y->array[1])
1735         return 0;
1736       /* Fall through.  */
1737     case 1:
1738       return x->array[0] == y->array[0];
1739       break;
1740     default:
1741       abort ();
1742     }
1743 }
1744
1745 static INLINE int
1746 cpu_flags_all_zero (const union i386_cpu_flags *x)
1747 {
1748   switch (ARRAY_SIZE(x->array))
1749     {
1750     case 4:
1751       if (x->array[3])
1752         return 0;
1753       /* Fall through.  */
1754     case 3:
1755       if (x->array[2])
1756         return 0;
1757       /* Fall through.  */
1758     case 2:
1759       if (x->array[1])
1760         return 0;
1761       /* Fall through.  */
1762     case 1:
1763       return !x->array[0];
1764     default:
1765       abort ();
1766     }
1767 }
1768
1769 static INLINE int
1770 cpu_flags_equal (const union i386_cpu_flags *x,
1771                  const union i386_cpu_flags *y)
1772 {
1773   switch (ARRAY_SIZE(x->array))
1774     {
1775     case 4:
1776       if (x->array[3] != y->array[3])
1777         return 0;
1778       /* Fall through.  */
1779     case 3:
1780       if (x->array[2] != y->array[2])
1781         return 0;
1782       /* Fall through.  */
1783     case 2:
1784       if (x->array[1] != y->array[1])
1785         return 0;
1786       /* Fall through.  */
1787     case 1:
1788       return x->array[0] == y->array[0];
1789       break;
1790     default:
1791       abort ();
1792     }
1793 }
1794
1795 static INLINE int
1796 cpu_flags_check_cpu64 (i386_cpu_flags f)
1797 {
1798   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1799            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1800 }
1801
1802 static INLINE i386_cpu_flags
1803 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1804 {
1805   switch (ARRAY_SIZE (x.array))
1806     {
1807     case 4:
1808       x.array [3] &= y.array [3];
1809       /* Fall through.  */
1810     case 3:
1811       x.array [2] &= y.array [2];
1812       /* Fall through.  */
1813     case 2:
1814       x.array [1] &= y.array [1];
1815       /* Fall through.  */
1816     case 1:
1817       x.array [0] &= y.array [0];
1818       break;
1819     default:
1820       abort ();
1821     }
1822   return x;
1823 }
1824
1825 static INLINE i386_cpu_flags
1826 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1827 {
1828   switch (ARRAY_SIZE (x.array))
1829     {
1830     case 4:
1831       x.array [3] |= y.array [3];
1832       /* Fall through.  */
1833     case 3:
1834       x.array [2] |= y.array [2];
1835       /* Fall through.  */
1836     case 2:
1837       x.array [1] |= y.array [1];
1838       /* Fall through.  */
1839     case 1:
1840       x.array [0] |= y.array [0];
1841       break;
1842     default:
1843       abort ();
1844     }
1845   return x;
1846 }
1847
1848 static INLINE i386_cpu_flags
1849 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1850 {
1851   switch (ARRAY_SIZE (x.array))
1852     {
1853     case 4:
1854       x.array [3] &= ~y.array [3];
1855       /* Fall through.  */
1856     case 3:
1857       x.array [2] &= ~y.array [2];
1858       /* Fall through.  */
1859     case 2:
1860       x.array [1] &= ~y.array [1];
1861       /* Fall through.  */
1862     case 1:
1863       x.array [0] &= ~y.array [0];
1864       break;
1865     default:
1866       abort ();
1867     }
1868   return x;
1869 }
1870
1871 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1872
1873 #define CPU_FLAGS_ARCH_MATCH            0x1
1874 #define CPU_FLAGS_64BIT_MATCH           0x2
1875
1876 #define CPU_FLAGS_PERFECT_MATCH \
1877   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1878
1879 /* Return CPU flags match bits. */
1880
1881 static int
1882 cpu_flags_match (const insn_template *t)
1883 {
1884   i386_cpu_flags x = t->cpu_flags;
1885   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1886
1887   x.bitfield.cpu64 = 0;
1888   x.bitfield.cpuno64 = 0;
1889
1890   if (cpu_flags_all_zero (&x))
1891     {
1892       /* This instruction is available on all archs.  */
1893       match |= CPU_FLAGS_ARCH_MATCH;
1894     }
1895   else
1896     {
1897       /* This instruction is available only on some archs.  */
1898       i386_cpu_flags cpu = cpu_arch_flags;
1899
1900       /* AVX512VL is no standalone feature - match it and then strip it.  */
1901       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1902         return match;
1903       x.bitfield.cpuavx512vl = 0;
1904
1905       cpu = cpu_flags_and (x, cpu);
1906       if (!cpu_flags_all_zero (&cpu))
1907         {
1908           if (x.bitfield.cpuavx)
1909             {
1910               /* We need to check a few extra flags with AVX.  */
1911               if (cpu.bitfield.cpuavx
1912                   && (!t->opcode_modifier.sse2avx || sse2avx)
1913                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1914                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1915                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1916                 match |= CPU_FLAGS_ARCH_MATCH;
1917             }
1918           else if (x.bitfield.cpuavx512f)
1919             {
1920               /* We need to check a few extra flags with AVX512F.  */
1921               if (cpu.bitfield.cpuavx512f
1922                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1923                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1924                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1925                 match |= CPU_FLAGS_ARCH_MATCH;
1926             }
1927           else
1928             match |= CPU_FLAGS_ARCH_MATCH;
1929         }
1930     }
1931   return match;
1932 }
1933
1934 static INLINE i386_operand_type
1935 operand_type_and (i386_operand_type x, i386_operand_type y)
1936 {
1937   if (x.bitfield.class != y.bitfield.class)
1938     x.bitfield.class = ClassNone;
1939   if (x.bitfield.instance != y.bitfield.instance)
1940     x.bitfield.instance = InstanceNone;
1941
1942   switch (ARRAY_SIZE (x.array))
1943     {
1944     case 3:
1945       x.array [2] &= y.array [2];
1946       /* Fall through.  */
1947     case 2:
1948       x.array [1] &= y.array [1];
1949       /* Fall through.  */
1950     case 1:
1951       x.array [0] &= y.array [0];
1952       break;
1953     default:
1954       abort ();
1955     }
1956   return x;
1957 }
1958
1959 static INLINE i386_operand_type
1960 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1961 {
1962   gas_assert (y.bitfield.class == ClassNone);
1963   gas_assert (y.bitfield.instance == InstanceNone);
1964
1965   switch (ARRAY_SIZE (x.array))
1966     {
1967     case 3:
1968       x.array [2] &= ~y.array [2];
1969       /* Fall through.  */
1970     case 2:
1971       x.array [1] &= ~y.array [1];
1972       /* Fall through.  */
1973     case 1:
1974       x.array [0] &= ~y.array [0];
1975       break;
1976     default:
1977       abort ();
1978     }
1979   return x;
1980 }
1981
1982 static INLINE i386_operand_type
1983 operand_type_or (i386_operand_type x, i386_operand_type y)
1984 {
1985   gas_assert (x.bitfield.class == ClassNone ||
1986               y.bitfield.class == ClassNone ||
1987               x.bitfield.class == y.bitfield.class);
1988   gas_assert (x.bitfield.instance == InstanceNone ||
1989               y.bitfield.instance == InstanceNone ||
1990               x.bitfield.instance == y.bitfield.instance);
1991
1992   switch (ARRAY_SIZE (x.array))
1993     {
1994     case 3:
1995       x.array [2] |= y.array [2];
1996       /* Fall through.  */
1997     case 2:
1998       x.array [1] |= y.array [1];
1999       /* Fall through.  */
2000     case 1:
2001       x.array [0] |= y.array [0];
2002       break;
2003     default:
2004       abort ();
2005     }
2006   return x;
2007 }
2008
2009 static INLINE i386_operand_type
2010 operand_type_xor (i386_operand_type x, i386_operand_type y)
2011 {
2012   gas_assert (y.bitfield.class == ClassNone);
2013   gas_assert (y.bitfield.instance == InstanceNone);
2014
2015   switch (ARRAY_SIZE (x.array))
2016     {
2017     case 3:
2018       x.array [2] ^= y.array [2];
2019       /* Fall through.  */
2020     case 2:
2021       x.array [1] ^= y.array [1];
2022       /* Fall through.  */
2023     case 1:
2024       x.array [0] ^= y.array [0];
2025       break;
2026     default:
2027       abort ();
2028     }
2029   return x;
2030 }
2031
2032 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2033 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2034 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2035 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2036 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2037 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2038 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2039 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2040 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2041 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2042 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2043 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2044 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2045 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2046 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2047 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2048 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2049
2050 enum operand_type
2051 {
2052   reg,
2053   imm,
2054   disp,
2055   anymem
2056 };
2057
2058 static INLINE int
2059 operand_type_check (i386_operand_type t, enum operand_type c)
2060 {
2061   switch (c)
2062     {
2063     case reg:
2064       return t.bitfield.class == Reg;
2065
2066     case imm:
2067       return (t.bitfield.imm8
2068               || t.bitfield.imm8s
2069               || t.bitfield.imm16
2070               || t.bitfield.imm32
2071               || t.bitfield.imm32s
2072               || t.bitfield.imm64);
2073
2074     case disp:
2075       return (t.bitfield.disp8
2076               || t.bitfield.disp16
2077               || t.bitfield.disp32
2078               || t.bitfield.disp32s
2079               || t.bitfield.disp64);
2080
2081     case anymem:
2082       return (t.bitfield.disp8
2083               || t.bitfield.disp16
2084               || t.bitfield.disp32
2085               || t.bitfield.disp32s
2086               || t.bitfield.disp64
2087               || t.bitfield.baseindex);
2088
2089     default:
2090       abort ();
2091     }
2092
2093   return 0;
2094 }
2095
2096 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2097    between operand GIVEN and opeand WANTED for instruction template T.  */
2098
2099 static INLINE int
2100 match_operand_size (const insn_template *t, unsigned int wanted,
2101                     unsigned int given)
2102 {
2103   return !((i.types[given].bitfield.byte
2104             && !t->operand_types[wanted].bitfield.byte)
2105            || (i.types[given].bitfield.word
2106                && !t->operand_types[wanted].bitfield.word)
2107            || (i.types[given].bitfield.dword
2108                && !t->operand_types[wanted].bitfield.dword)
2109            || (i.types[given].bitfield.qword
2110                && !t->operand_types[wanted].bitfield.qword)
2111            || (i.types[given].bitfield.tbyte
2112                && !t->operand_types[wanted].bitfield.tbyte));
2113 }
2114
2115 /* Return 1 if there is no conflict in SIMD register between operand
2116    GIVEN and opeand WANTED for instruction template T.  */
2117
2118 static INLINE int
2119 match_simd_size (const insn_template *t, unsigned int wanted,
2120                  unsigned int given)
2121 {
2122   return !((i.types[given].bitfield.xmmword
2123             && !t->operand_types[wanted].bitfield.xmmword)
2124            || (i.types[given].bitfield.ymmword
2125                && !t->operand_types[wanted].bitfield.ymmword)
2126            || (i.types[given].bitfield.zmmword
2127                && !t->operand_types[wanted].bitfield.zmmword));
2128 }
2129
2130 /* Return 1 if there is no conflict in any size between operand GIVEN
2131    and opeand WANTED for instruction template T.  */
2132
2133 static INLINE int
2134 match_mem_size (const insn_template *t, unsigned int wanted,
2135                 unsigned int given)
2136 {
2137   return (match_operand_size (t, wanted, given)
2138           && !((i.types[given].bitfield.unspecified
2139                 && !i.broadcast
2140                 && !t->operand_types[wanted].bitfield.unspecified)
2141                || (i.types[given].bitfield.fword
2142                    && !t->operand_types[wanted].bitfield.fword)
2143                /* For scalar opcode templates to allow register and memory
2144                   operands at the same time, some special casing is needed
2145                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2146                   down-conversion vpmov*.  */
2147                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2148                     && !t->opcode_modifier.broadcast
2149                     && (t->operand_types[wanted].bitfield.byte
2150                         || t->operand_types[wanted].bitfield.word
2151                         || t->operand_types[wanted].bitfield.dword
2152                         || t->operand_types[wanted].bitfield.qword))
2153                    ? (i.types[given].bitfield.xmmword
2154                       || i.types[given].bitfield.ymmword
2155                       || i.types[given].bitfield.zmmword)
2156                    : !match_simd_size(t, wanted, given))));
2157 }
2158
2159 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2160    operands for instruction template T, and it has MATCH_REVERSE set if there
2161    is no size conflict on any operands for the template with operands reversed
2162    (and the template allows for reversing in the first place).  */
2163
2164 #define MATCH_STRAIGHT 1
2165 #define MATCH_REVERSE  2
2166
2167 static INLINE unsigned int
2168 operand_size_match (const insn_template *t)
2169 {
2170   unsigned int j, match = MATCH_STRAIGHT;
2171
2172   /* Don't check non-absolute jump instructions.  */
2173   if (t->opcode_modifier.jump
2174       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2175     return match;
2176
2177   /* Check memory and accumulator operand size.  */
2178   for (j = 0; j < i.operands; j++)
2179     {
2180       if (i.types[j].bitfield.class != Reg
2181           && i.types[j].bitfield.class != RegSIMD
2182           && t->opcode_modifier.anysize)
2183         continue;
2184
2185       if (t->operand_types[j].bitfield.class == Reg
2186           && !match_operand_size (t, j, j))
2187         {
2188           match = 0;
2189           break;
2190         }
2191
2192       if (t->operand_types[j].bitfield.class == RegSIMD
2193           && !match_simd_size (t, j, j))
2194         {
2195           match = 0;
2196           break;
2197         }
2198
2199       if (t->operand_types[j].bitfield.instance == Accum
2200           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2201         {
2202           match = 0;
2203           break;
2204         }
2205
2206       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2207         {
2208           match = 0;
2209           break;
2210         }
2211     }
2212
2213   if (!t->opcode_modifier.d)
2214     {
2215     mismatch:
2216       if (!match)
2217         i.error = operand_size_mismatch;
2218       return match;
2219     }
2220
2221   /* Check reverse.  */
2222   gas_assert (i.operands >= 2 && i.operands <= 3);
2223
2224   for (j = 0; j < i.operands; j++)
2225     {
2226       unsigned int given = i.operands - j - 1;
2227
2228       if (t->operand_types[j].bitfield.class == Reg
2229           && !match_operand_size (t, j, given))
2230         goto mismatch;
2231
2232       if (t->operand_types[j].bitfield.class == RegSIMD
2233           && !match_simd_size (t, j, given))
2234         goto mismatch;
2235
2236       if (t->operand_types[j].bitfield.instance == Accum
2237           && (!match_operand_size (t, j, given)
2238               || !match_simd_size (t, j, given)))
2239         goto mismatch;
2240
2241       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2242         goto mismatch;
2243     }
2244
2245   return match | MATCH_REVERSE;
2246 }
2247
2248 static INLINE int
2249 operand_type_match (i386_operand_type overlap,
2250                     i386_operand_type given)
2251 {
2252   i386_operand_type temp = overlap;
2253
2254   temp.bitfield.unspecified = 0;
2255   temp.bitfield.byte = 0;
2256   temp.bitfield.word = 0;
2257   temp.bitfield.dword = 0;
2258   temp.bitfield.fword = 0;
2259   temp.bitfield.qword = 0;
2260   temp.bitfield.tbyte = 0;
2261   temp.bitfield.xmmword = 0;
2262   temp.bitfield.ymmword = 0;
2263   temp.bitfield.zmmword = 0;
2264   if (operand_type_all_zero (&temp))
2265     goto mismatch;
2266
2267   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2268     return 1;
2269
2270  mismatch:
2271   i.error = operand_type_mismatch;
2272   return 0;
2273 }
2274
2275 /* If given types g0 and g1 are registers they must be of the same type
2276    unless the expected operand type register overlap is null.
2277    Some Intel syntax memory operand size checking also happens here.  */
2278
2279 static INLINE int
2280 operand_type_register_match (i386_operand_type g0,
2281                              i386_operand_type t0,
2282                              i386_operand_type g1,
2283                              i386_operand_type t1)
2284 {
2285   if (g0.bitfield.class != Reg
2286       && g0.bitfield.class != RegSIMD
2287       && (!operand_type_check (g0, anymem)
2288           || g0.bitfield.unspecified
2289           || (t0.bitfield.class != Reg
2290               && t0.bitfield.class != RegSIMD)))
2291     return 1;
2292
2293   if (g1.bitfield.class != Reg
2294       && g1.bitfield.class != RegSIMD
2295       && (!operand_type_check (g1, anymem)
2296           || g1.bitfield.unspecified
2297           || (t1.bitfield.class != Reg
2298               && t1.bitfield.class != RegSIMD)))
2299     return 1;
2300
2301   if (g0.bitfield.byte == g1.bitfield.byte
2302       && g0.bitfield.word == g1.bitfield.word
2303       && g0.bitfield.dword == g1.bitfield.dword
2304       && g0.bitfield.qword == g1.bitfield.qword
2305       && g0.bitfield.xmmword == g1.bitfield.xmmword
2306       && g0.bitfield.ymmword == g1.bitfield.ymmword
2307       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2308     return 1;
2309
2310   if (!(t0.bitfield.byte & t1.bitfield.byte)
2311       && !(t0.bitfield.word & t1.bitfield.word)
2312       && !(t0.bitfield.dword & t1.bitfield.dword)
2313       && !(t0.bitfield.qword & t1.bitfield.qword)
2314       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2315       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2316       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2317     return 1;
2318
2319   i.error = register_type_mismatch;
2320
2321   return 0;
2322 }
2323
2324 static INLINE unsigned int
2325 register_number (const reg_entry *r)
2326 {
2327   unsigned int nr = r->reg_num;
2328
2329   if (r->reg_flags & RegRex)
2330     nr += 8;
2331
2332   if (r->reg_flags & RegVRex)
2333     nr += 16;
2334
2335   return nr;
2336 }
2337
2338 static INLINE unsigned int
2339 mode_from_disp_size (i386_operand_type t)
2340 {
2341   if (t.bitfield.disp8)
2342     return 1;
2343   else if (t.bitfield.disp16
2344            || t.bitfield.disp32
2345            || t.bitfield.disp32s)
2346     return 2;
2347   else
2348     return 0;
2349 }
2350
2351 static INLINE int
2352 fits_in_signed_byte (addressT num)
2353 {
2354   return num + 0x80 <= 0xff;
2355 }
2356
2357 static INLINE int
2358 fits_in_unsigned_byte (addressT num)
2359 {
2360   return num <= 0xff;
2361 }
2362
2363 static INLINE int
2364 fits_in_unsigned_word (addressT num)
2365 {
2366   return num <= 0xffff;
2367 }
2368
2369 static INLINE int
2370 fits_in_signed_word (addressT num)
2371 {
2372   return num + 0x8000 <= 0xffff;
2373 }
2374
2375 static INLINE int
2376 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2377 {
2378 #ifndef BFD64
2379   return 1;
2380 #else
2381   return num + 0x80000000 <= 0xffffffff;
2382 #endif
2383 }                               /* fits_in_signed_long() */
2384
2385 static INLINE int
2386 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2387 {
2388 #ifndef BFD64
2389   return 1;
2390 #else
2391   return num <= 0xffffffff;
2392 #endif
2393 }                               /* fits_in_unsigned_long() */
2394
2395 static INLINE int
2396 fits_in_disp8 (offsetT num)
2397 {
2398   int shift = i.memshift;
2399   unsigned int mask;
2400
2401   if (shift == -1)
2402     abort ();
2403
2404   mask = (1 << shift) - 1;
2405
2406   /* Return 0 if NUM isn't properly aligned.  */
2407   if ((num & mask))
2408     return 0;
2409
2410   /* Check if NUM will fit in 8bit after shift.  */
2411   return fits_in_signed_byte (num >> shift);
2412 }
2413
2414 static INLINE int
2415 fits_in_imm4 (offsetT num)
2416 {
2417   return (num & 0xf) == num;
2418 }
2419
2420 static i386_operand_type
2421 smallest_imm_type (offsetT num)
2422 {
2423   i386_operand_type t;
2424
2425   operand_type_set (&t, 0);
2426   t.bitfield.imm64 = 1;
2427
2428   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2429     {
2430       /* This code is disabled on the 486 because all the Imm1 forms
2431          in the opcode table are slower on the i486.  They're the
2432          versions with the implicitly specified single-position
2433          displacement, which has another syntax if you really want to
2434          use that form.  */
2435       t.bitfield.imm1 = 1;
2436       t.bitfield.imm8 = 1;
2437       t.bitfield.imm8s = 1;
2438       t.bitfield.imm16 = 1;
2439       t.bitfield.imm32 = 1;
2440       t.bitfield.imm32s = 1;
2441     }
2442   else if (fits_in_signed_byte (num))
2443     {
2444       t.bitfield.imm8 = 1;
2445       t.bitfield.imm8s = 1;
2446       t.bitfield.imm16 = 1;
2447       t.bitfield.imm32 = 1;
2448       t.bitfield.imm32s = 1;
2449     }
2450   else if (fits_in_unsigned_byte (num))
2451     {
2452       t.bitfield.imm8 = 1;
2453       t.bitfield.imm16 = 1;
2454       t.bitfield.imm32 = 1;
2455       t.bitfield.imm32s = 1;
2456     }
2457   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2458     {
2459       t.bitfield.imm16 = 1;
2460       t.bitfield.imm32 = 1;
2461       t.bitfield.imm32s = 1;
2462     }
2463   else if (fits_in_signed_long (num))
2464     {
2465       t.bitfield.imm32 = 1;
2466       t.bitfield.imm32s = 1;
2467     }
2468   else if (fits_in_unsigned_long (num))
2469     t.bitfield.imm32 = 1;
2470
2471   return t;
2472 }
2473
2474 static offsetT
2475 offset_in_range (offsetT val, int size)
2476 {
2477   addressT mask;
2478
2479   switch (size)
2480     {
2481     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2482     case 2: mask = ((addressT) 1 << 16) - 1; break;
2483     case 4: mask = ((addressT) 2 << 31) - 1; break;
2484 #ifdef BFD64
2485     case 8: mask = ((addressT) 2 << 63) - 1; break;
2486 #endif
2487     default: abort ();
2488     }
2489
2490 #ifdef BFD64
2491   /* If BFD64, sign extend val for 32bit address mode.  */
2492   if (flag_code != CODE_64BIT
2493       || i.prefix[ADDR_PREFIX])
2494     if ((val & ~(((addressT) 2 << 31) - 1)) == 0)
2495       val = (val ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2496 #endif
2497
2498   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2499     {
2500       char buf1[40], buf2[40];
2501
2502       sprint_value (buf1, val);
2503       sprint_value (buf2, val & mask);
2504       as_warn (_("%s shortened to %s"), buf1, buf2);
2505     }
2506   return val & mask;
2507 }
2508
2509 enum PREFIX_GROUP
2510 {
2511   PREFIX_EXIST = 0,
2512   PREFIX_LOCK,
2513   PREFIX_REP,
2514   PREFIX_DS,
2515   PREFIX_OTHER
2516 };
2517
2518 /* Returns
2519    a. PREFIX_EXIST if attempting to add a prefix where one from the
2520    same class already exists.
2521    b. PREFIX_LOCK if lock prefix is added.
2522    c. PREFIX_REP if rep/repne prefix is added.
2523    d. PREFIX_DS if ds prefix is added.
2524    e. PREFIX_OTHER if other prefix is added.
2525  */
2526
2527 static enum PREFIX_GROUP
2528 add_prefix (unsigned int prefix)
2529 {
2530   enum PREFIX_GROUP ret = PREFIX_OTHER;
2531   unsigned int q;
2532
2533   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2534       && flag_code == CODE_64BIT)
2535     {
2536       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2537           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2538           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2539           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2540         ret = PREFIX_EXIST;
2541       q = REX_PREFIX;
2542     }
2543   else
2544     {
2545       switch (prefix)
2546         {
2547         default:
2548           abort ();
2549
2550         case DS_PREFIX_OPCODE:
2551           ret = PREFIX_DS;
2552           /* Fall through.  */
2553         case CS_PREFIX_OPCODE:
2554         case ES_PREFIX_OPCODE:
2555         case FS_PREFIX_OPCODE:
2556         case GS_PREFIX_OPCODE:
2557         case SS_PREFIX_OPCODE:
2558           q = SEG_PREFIX;
2559           break;
2560
2561         case REPNE_PREFIX_OPCODE:
2562         case REPE_PREFIX_OPCODE:
2563           q = REP_PREFIX;
2564           ret = PREFIX_REP;
2565           break;
2566
2567         case LOCK_PREFIX_OPCODE:
2568           q = LOCK_PREFIX;
2569           ret = PREFIX_LOCK;
2570           break;
2571
2572         case FWAIT_OPCODE:
2573           q = WAIT_PREFIX;
2574           break;
2575
2576         case ADDR_PREFIX_OPCODE:
2577           q = ADDR_PREFIX;
2578           break;
2579
2580         case DATA_PREFIX_OPCODE:
2581           q = DATA_PREFIX;
2582           break;
2583         }
2584       if (i.prefix[q] != 0)
2585         ret = PREFIX_EXIST;
2586     }
2587
2588   if (ret)
2589     {
2590       if (!i.prefix[q])
2591         ++i.prefixes;
2592       i.prefix[q] |= prefix;
2593     }
2594   else
2595     as_bad (_("same type of prefix used twice"));
2596
2597   return ret;
2598 }
2599
2600 static void
2601 update_code_flag (int value, int check)
2602 {
2603   PRINTF_LIKE ((*as_error));
2604
2605   flag_code = (enum flag_code) value;
2606   if (flag_code == CODE_64BIT)
2607     {
2608       cpu_arch_flags.bitfield.cpu64 = 1;
2609       cpu_arch_flags.bitfield.cpuno64 = 0;
2610     }
2611   else
2612     {
2613       cpu_arch_flags.bitfield.cpu64 = 0;
2614       cpu_arch_flags.bitfield.cpuno64 = 1;
2615     }
2616   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2617     {
2618       if (check)
2619         as_error = as_fatal;
2620       else
2621         as_error = as_bad;
2622       (*as_error) (_("64bit mode not supported on `%s'."),
2623                    cpu_arch_name ? cpu_arch_name : default_arch);
2624     }
2625   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2626     {
2627       if (check)
2628         as_error = as_fatal;
2629       else
2630         as_error = as_bad;
2631       (*as_error) (_("32bit mode not supported on `%s'."),
2632                    cpu_arch_name ? cpu_arch_name : default_arch);
2633     }
2634   stackop_size = '\0';
2635 }
2636
2637 static void
2638 set_code_flag (int value)
2639 {
2640   update_code_flag (value, 0);
2641 }
2642
2643 static void
2644 set_16bit_gcc_code_flag (int new_code_flag)
2645 {
2646   flag_code = (enum flag_code) new_code_flag;
2647   if (flag_code != CODE_16BIT)
2648     abort ();
2649   cpu_arch_flags.bitfield.cpu64 = 0;
2650   cpu_arch_flags.bitfield.cpuno64 = 1;
2651   stackop_size = LONG_MNEM_SUFFIX;
2652 }
2653
2654 static void
2655 set_intel_syntax (int syntax_flag)
2656 {
2657   /* Find out if register prefixing is specified.  */
2658   int ask_naked_reg = 0;
2659
2660   SKIP_WHITESPACE ();
2661   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2662     {
2663       char *string;
2664       int e = get_symbol_name (&string);
2665
2666       if (strcmp (string, "prefix") == 0)
2667         ask_naked_reg = 1;
2668       else if (strcmp (string, "noprefix") == 0)
2669         ask_naked_reg = -1;
2670       else
2671         as_bad (_("bad argument to syntax directive."));
2672       (void) restore_line_pointer (e);
2673     }
2674   demand_empty_rest_of_line ();
2675
2676   intel_syntax = syntax_flag;
2677
2678   if (ask_naked_reg == 0)
2679     allow_naked_reg = (intel_syntax
2680                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2681   else
2682     allow_naked_reg = (ask_naked_reg < 0);
2683
2684   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2685
2686   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2687   identifier_chars['$'] = intel_syntax ? '$' : 0;
2688   register_prefix = allow_naked_reg ? "" : "%";
2689 }
2690
2691 static void
2692 set_intel_mnemonic (int mnemonic_flag)
2693 {
2694   intel_mnemonic = mnemonic_flag;
2695 }
2696
2697 static void
2698 set_allow_index_reg (int flag)
2699 {
2700   allow_index_reg = flag;
2701 }
2702
2703 static void
2704 set_check (int what)
2705 {
2706   enum check_kind *kind;
2707   const char *str;
2708
2709   if (what)
2710     {
2711       kind = &operand_check;
2712       str = "operand";
2713     }
2714   else
2715     {
2716       kind = &sse_check;
2717       str = "sse";
2718     }
2719
2720   SKIP_WHITESPACE ();
2721
2722   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2723     {
2724       char *string;
2725       int e = get_symbol_name (&string);
2726
2727       if (strcmp (string, "none") == 0)
2728         *kind = check_none;
2729       else if (strcmp (string, "warning") == 0)
2730         *kind = check_warning;
2731       else if (strcmp (string, "error") == 0)
2732         *kind = check_error;
2733       else
2734         as_bad (_("bad argument to %s_check directive."), str);
2735       (void) restore_line_pointer (e);
2736     }
2737   else
2738     as_bad (_("missing argument for %s_check directive"), str);
2739
2740   demand_empty_rest_of_line ();
2741 }
2742
2743 static void
2744 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2745                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2746 {
2747 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2748   static const char *arch;
2749
2750   /* Intel LIOM is only supported on ELF.  */
2751   if (!IS_ELF)
2752     return;
2753
2754   if (!arch)
2755     {
2756       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2757          use default_arch.  */
2758       arch = cpu_arch_name;
2759       if (!arch)
2760         arch = default_arch;
2761     }
2762
2763   /* If we are targeting Intel MCU, we must enable it.  */
2764   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2765       || new_flag.bitfield.cpuiamcu)
2766     return;
2767
2768   /* If we are targeting Intel L1OM, we must enable it.  */
2769   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2770       || new_flag.bitfield.cpul1om)
2771     return;
2772
2773   /* If we are targeting Intel K1OM, we must enable it.  */
2774   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2775       || new_flag.bitfield.cpuk1om)
2776     return;
2777
2778   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2779 #endif
2780 }
2781
2782 static void
2783 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2784 {
2785   SKIP_WHITESPACE ();
2786
2787   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2788     {
2789       char *string;
2790       int e = get_symbol_name (&string);
2791       unsigned int j;
2792       i386_cpu_flags flags;
2793
2794       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2795         {
2796           if (strcmp (string, cpu_arch[j].name) == 0)
2797             {
2798               check_cpu_arch_compatible (string, cpu_arch[j].flags);
2799
2800               if (*string != '.')
2801                 {
2802                   cpu_arch_name = cpu_arch[j].name;
2803                   cpu_sub_arch_name = NULL;
2804                   cpu_arch_flags = cpu_arch[j].flags;
2805                   if (flag_code == CODE_64BIT)
2806                     {
2807                       cpu_arch_flags.bitfield.cpu64 = 1;
2808                       cpu_arch_flags.bitfield.cpuno64 = 0;
2809                     }
2810                   else
2811                     {
2812                       cpu_arch_flags.bitfield.cpu64 = 0;
2813                       cpu_arch_flags.bitfield.cpuno64 = 1;
2814                     }
2815                   cpu_arch_isa = cpu_arch[j].type;
2816                   cpu_arch_isa_flags = cpu_arch[j].flags;
2817                   if (!cpu_arch_tune_set)
2818                     {
2819                       cpu_arch_tune = cpu_arch_isa;
2820                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2821                     }
2822                   break;
2823                 }
2824
2825               flags = cpu_flags_or (cpu_arch_flags,
2826                                     cpu_arch[j].flags);
2827
2828               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2829                 {
2830                   if (cpu_sub_arch_name)
2831                     {
2832                       char *name = cpu_sub_arch_name;
2833                       cpu_sub_arch_name = concat (name,
2834                                                   cpu_arch[j].name,
2835                                                   (const char *) NULL);
2836                       free (name);
2837                     }
2838                   else
2839                     cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2840                   cpu_arch_flags = flags;
2841                   cpu_arch_isa_flags = flags;
2842                 }
2843               else
2844                 cpu_arch_isa_flags
2845                   = cpu_flags_or (cpu_arch_isa_flags,
2846                                   cpu_arch[j].flags);
2847               (void) restore_line_pointer (e);
2848               demand_empty_rest_of_line ();
2849               return;
2850             }
2851         }
2852
2853       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2854         {
2855           /* Disable an ISA extension.  */
2856           for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2857             if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2858               {
2859                 flags = cpu_flags_and_not (cpu_arch_flags,
2860                                            cpu_noarch[j].flags);
2861                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2862                   {
2863                     if (cpu_sub_arch_name)
2864                       {
2865                         char *name = cpu_sub_arch_name;
2866                         cpu_sub_arch_name = concat (name, string,
2867                                                     (const char *) NULL);
2868                         free (name);
2869                       }
2870                     else
2871                       cpu_sub_arch_name = xstrdup (string);
2872                     cpu_arch_flags = flags;
2873                     cpu_arch_isa_flags = flags;
2874                   }
2875                 (void) restore_line_pointer (e);
2876                 demand_empty_rest_of_line ();
2877                 return;
2878               }
2879
2880           j = ARRAY_SIZE (cpu_arch);
2881         }
2882
2883       if (j >= ARRAY_SIZE (cpu_arch))
2884         as_bad (_("no such architecture: `%s'"), string);
2885
2886       *input_line_pointer = e;
2887     }
2888   else
2889     as_bad (_("missing cpu architecture"));
2890
2891   no_cond_jump_promotion = 0;
2892   if (*input_line_pointer == ','
2893       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2894     {
2895       char *string;
2896       char e;
2897
2898       ++input_line_pointer;
2899       e = get_symbol_name (&string);
2900
2901       if (strcmp (string, "nojumps") == 0)
2902         no_cond_jump_promotion = 1;
2903       else if (strcmp (string, "jumps") == 0)
2904         ;
2905       else
2906         as_bad (_("no such architecture modifier: `%s'"), string);
2907
2908       (void) restore_line_pointer (e);
2909     }
2910
2911   demand_empty_rest_of_line ();
2912 }
2913
2914 enum bfd_architecture
2915 i386_arch (void)
2916 {
2917   if (cpu_arch_isa == PROCESSOR_L1OM)
2918     {
2919       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2920           || flag_code != CODE_64BIT)
2921         as_fatal (_("Intel L1OM is 64bit ELF only"));
2922       return bfd_arch_l1om;
2923     }
2924   else if (cpu_arch_isa == PROCESSOR_K1OM)
2925     {
2926       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2927           || flag_code != CODE_64BIT)
2928         as_fatal (_("Intel K1OM is 64bit ELF only"));
2929       return bfd_arch_k1om;
2930     }
2931   else if (cpu_arch_isa == PROCESSOR_IAMCU)
2932     {
2933       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2934           || flag_code == CODE_64BIT)
2935         as_fatal (_("Intel MCU is 32bit ELF only"));
2936       return bfd_arch_iamcu;
2937     }
2938   else
2939     return bfd_arch_i386;
2940 }
2941
2942 unsigned long
2943 i386_mach (void)
2944 {
2945   if (!strncmp (default_arch, "x86_64", 6))
2946     {
2947       if (cpu_arch_isa == PROCESSOR_L1OM)
2948         {
2949           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2950               || default_arch[6] != '\0')
2951             as_fatal (_("Intel L1OM is 64bit ELF only"));
2952           return bfd_mach_l1om;
2953         }
2954       else if (cpu_arch_isa == PROCESSOR_K1OM)
2955         {
2956           if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2957               || default_arch[6] != '\0')
2958             as_fatal (_("Intel K1OM is 64bit ELF only"));
2959           return bfd_mach_k1om;
2960         }
2961       else if (default_arch[6] == '\0')
2962         return bfd_mach_x86_64;
2963       else
2964         return bfd_mach_x64_32;
2965     }
2966   else if (!strcmp (default_arch, "i386")
2967            || !strcmp (default_arch, "iamcu"))
2968     {
2969       if (cpu_arch_isa == PROCESSOR_IAMCU)
2970         {
2971           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2972             as_fatal (_("Intel MCU is 32bit ELF only"));
2973           return bfd_mach_i386_iamcu;
2974         }
2975       else
2976         return bfd_mach_i386_i386;
2977     }
2978   else
2979     as_fatal (_("unknown architecture"));
2980 }
2981 \f
2982 void
2983 md_begin (void)
2984 {
2985   const char *hash_err;
2986
2987   /* Support pseudo prefixes like {disp32}.  */
2988   lex_type ['{'] = LEX_BEGIN_NAME;
2989
2990   /* Initialize op_hash hash table.  */
2991   op_hash = hash_new ();
2992
2993   {
2994     const insn_template *optab;
2995     templates *core_optab;
2996
2997     /* Setup for loop.  */
2998     optab = i386_optab;
2999     core_optab = XNEW (templates);
3000     core_optab->start = optab;
3001
3002     while (1)
3003       {
3004         ++optab;
3005         if (optab->name == NULL
3006             || strcmp (optab->name, (optab - 1)->name) != 0)
3007           {
3008             /* different name --> ship out current template list;
3009                add to hash table; & begin anew.  */
3010             core_optab->end = optab;
3011             hash_err = hash_insert (op_hash,
3012                                     (optab - 1)->name,
3013                                     (void *) core_optab);
3014             if (hash_err)
3015               {
3016                 as_fatal (_("can't hash %s: %s"),
3017                           (optab - 1)->name,
3018                           hash_err);
3019               }
3020             if (optab->name == NULL)
3021               break;
3022             core_optab = XNEW (templates);
3023             core_optab->start = optab;
3024           }
3025       }
3026   }
3027
3028   /* Initialize reg_hash hash table.  */
3029   reg_hash = hash_new ();
3030   {
3031     const reg_entry *regtab;
3032     unsigned int regtab_size = i386_regtab_size;
3033
3034     for (regtab = i386_regtab; regtab_size--; regtab++)
3035       {
3036         hash_err = hash_insert (reg_hash, regtab->reg_name, (void *) regtab);
3037         if (hash_err)
3038           as_fatal (_("can't hash %s: %s"),
3039                     regtab->reg_name,
3040                     hash_err);
3041       }
3042   }
3043
3044   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3045   {
3046     int c;
3047     char *p;
3048
3049     for (c = 0; c < 256; c++)
3050       {
3051         if (ISDIGIT (c))
3052           {
3053             digit_chars[c] = c;
3054             mnemonic_chars[c] = c;
3055             register_chars[c] = c;
3056             operand_chars[c] = c;
3057           }
3058         else if (ISLOWER (c))
3059           {
3060             mnemonic_chars[c] = c;
3061             register_chars[c] = c;
3062             operand_chars[c] = c;
3063           }
3064         else if (ISUPPER (c))
3065           {
3066             mnemonic_chars[c] = TOLOWER (c);
3067             register_chars[c] = mnemonic_chars[c];
3068             operand_chars[c] = c;
3069           }
3070         else if (c == '{' || c == '}')
3071           {
3072             mnemonic_chars[c] = c;
3073             operand_chars[c] = c;
3074           }
3075
3076         if (ISALPHA (c) || ISDIGIT (c))
3077           identifier_chars[c] = c;
3078         else if (c >= 128)
3079           {
3080             identifier_chars[c] = c;
3081             operand_chars[c] = c;
3082           }
3083       }
3084
3085 #ifdef LEX_AT
3086     identifier_chars['@'] = '@';
3087 #endif
3088 #ifdef LEX_QM
3089     identifier_chars['?'] = '?';
3090     operand_chars['?'] = '?';
3091 #endif
3092     digit_chars['-'] = '-';
3093     mnemonic_chars['_'] = '_';
3094     mnemonic_chars['-'] = '-';
3095     mnemonic_chars['.'] = '.';
3096     identifier_chars['_'] = '_';
3097     identifier_chars['.'] = '.';
3098
3099     for (p = operand_special_chars; *p != '\0'; p++)
3100       operand_chars[(unsigned char) *p] = *p;
3101   }
3102
3103   if (flag_code == CODE_64BIT)
3104     {
3105 #if defined (OBJ_COFF) && defined (TE_PE)
3106       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3107                                   ? 32 : 16);
3108 #else
3109       x86_dwarf2_return_column = 16;
3110 #endif
3111       x86_cie_data_alignment = -8;
3112     }
3113   else
3114     {
3115       x86_dwarf2_return_column = 8;
3116       x86_cie_data_alignment = -4;
3117     }
3118
3119   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3120      can be turned into BRANCH_PREFIX frag.  */
3121   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3122     abort ();
3123 }
3124
3125 void
3126 i386_print_statistics (FILE *file)
3127 {
3128   hash_print_statistics (file, "i386 opcode", op_hash);
3129   hash_print_statistics (file, "i386 register", reg_hash);
3130 }
3131 \f
3132 #ifdef DEBUG386
3133
3134 /* Debugging routines for md_assemble.  */
3135 static void pte (insn_template *);
3136 static void pt (i386_operand_type);
3137 static void pe (expressionS *);
3138 static void ps (symbolS *);
3139
3140 static void
3141 pi (const char *line, i386_insn *x)
3142 {
3143   unsigned int j;
3144
3145   fprintf (stdout, "%s: template ", line);
3146   pte (&x->tm);
3147   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3148            x->base_reg ? x->base_reg->reg_name : "none",
3149            x->index_reg ? x->index_reg->reg_name : "none",
3150            x->log2_scale_factor);
3151   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3152            x->rm.mode, x->rm.reg, x->rm.regmem);
3153   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3154            x->sib.base, x->sib.index, x->sib.scale);
3155   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3156            (x->rex & REX_W) != 0,
3157            (x->rex & REX_R) != 0,
3158            (x->rex & REX_X) != 0,
3159            (x->rex & REX_B) != 0);
3160   for (j = 0; j < x->operands; j++)
3161     {
3162       fprintf (stdout, "    #%d:  ", j + 1);
3163       pt (x->types[j]);
3164       fprintf (stdout, "\n");
3165       if (x->types[j].bitfield.class == Reg
3166           || x->types[j].bitfield.class == RegMMX
3167           || x->types[j].bitfield.class == RegSIMD
3168           || x->types[j].bitfield.class == SReg
3169           || x->types[j].bitfield.class == RegCR
3170           || x->types[j].bitfield.class == RegDR
3171           || x->types[j].bitfield.class == RegTR)
3172         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3173       if (operand_type_check (x->types[j], imm))
3174         pe (x->op[j].imms);
3175       if (operand_type_check (x->types[j], disp))
3176         pe (x->op[j].disps);
3177     }
3178 }
3179
3180 static void
3181 pte (insn_template *t)
3182 {
3183   unsigned int j;
3184   fprintf (stdout, " %d operands ", t->operands);
3185   fprintf (stdout, "opcode %x ", t->base_opcode);
3186   if (t->extension_opcode != None)
3187     fprintf (stdout, "ext %x ", t->extension_opcode);
3188   if (t->opcode_modifier.d)
3189     fprintf (stdout, "D");
3190   if (t->opcode_modifier.w)
3191     fprintf (stdout, "W");
3192   fprintf (stdout, "\n");
3193   for (j = 0; j < t->operands; j++)
3194     {
3195       fprintf (stdout, "    #%d type ", j + 1);
3196       pt (t->operand_types[j]);
3197       fprintf (stdout, "\n");
3198     }
3199 }
3200
3201 static void
3202 pe (expressionS *e)
3203 {
3204   fprintf (stdout, "    operation     %d\n", e->X_op);
3205   fprintf (stdout, "    add_number    %ld (%lx)\n",
3206            (long) e->X_add_number, (long) e->X_add_number);
3207   if (e->X_add_symbol)
3208     {
3209       fprintf (stdout, "    add_symbol    ");
3210       ps (e->X_add_symbol);
3211       fprintf (stdout, "\n");
3212     }
3213   if (e->X_op_symbol)
3214     {
3215       fprintf (stdout, "    op_symbol    ");
3216       ps (e->X_op_symbol);
3217       fprintf (stdout, "\n");
3218     }
3219 }
3220
3221 static void
3222 ps (symbolS *s)
3223 {
3224   fprintf (stdout, "%s type %s%s",
3225            S_GET_NAME (s),
3226            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3227            segment_name (S_GET_SEGMENT (s)));
3228 }
3229
3230 static struct type_name
3231   {
3232     i386_operand_type mask;
3233     const char *name;
3234   }
3235 const type_names[] =
3236 {
3237   { OPERAND_TYPE_REG8, "r8" },
3238   { OPERAND_TYPE_REG16, "r16" },
3239   { OPERAND_TYPE_REG32, "r32" },
3240   { OPERAND_TYPE_REG64, "r64" },
3241   { OPERAND_TYPE_ACC8, "acc8" },
3242   { OPERAND_TYPE_ACC16, "acc16" },
3243   { OPERAND_TYPE_ACC32, "acc32" },
3244   { OPERAND_TYPE_ACC64, "acc64" },
3245   { OPERAND_TYPE_IMM8, "i8" },
3246   { OPERAND_TYPE_IMM8, "i8s" },
3247   { OPERAND_TYPE_IMM16, "i16" },
3248   { OPERAND_TYPE_IMM32, "i32" },
3249   { OPERAND_TYPE_IMM32S, "i32s" },
3250   { OPERAND_TYPE_IMM64, "i64" },
3251   { OPERAND_TYPE_IMM1, "i1" },
3252   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3253   { OPERAND_TYPE_DISP8, "d8" },
3254   { OPERAND_TYPE_DISP16, "d16" },
3255   { OPERAND_TYPE_DISP32, "d32" },
3256   { OPERAND_TYPE_DISP32S, "d32s" },
3257   { OPERAND_TYPE_DISP64, "d64" },
3258   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3259   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3260   { OPERAND_TYPE_CONTROL, "control reg" },
3261   { OPERAND_TYPE_TEST, "test reg" },
3262   { OPERAND_TYPE_DEBUG, "debug reg" },
3263   { OPERAND_TYPE_FLOATREG, "FReg" },
3264   { OPERAND_TYPE_FLOATACC, "FAcc" },
3265   { OPERAND_TYPE_SREG, "SReg" },
3266   { OPERAND_TYPE_REGMMX, "rMMX" },
3267   { OPERAND_TYPE_REGXMM, "rXMM" },
3268   { OPERAND_TYPE_REGYMM, "rYMM" },
3269   { OPERAND_TYPE_REGZMM, "rZMM" },
3270   { OPERAND_TYPE_REGMASK, "Mask reg" },
3271 };
3272
3273 static void
3274 pt (i386_operand_type t)
3275 {
3276   unsigned int j;
3277   i386_operand_type a;
3278
3279   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3280     {
3281       a = operand_type_and (t, type_names[j].mask);
3282       if (operand_type_equal (&a, &type_names[j].mask))
3283         fprintf (stdout, "%s, ",  type_names[j].name);
3284     }
3285   fflush (stdout);
3286 }
3287
3288 #endif /* DEBUG386 */
3289 \f
3290 static bfd_reloc_code_real_type
3291 reloc (unsigned int size,
3292        int pcrel,
3293        int sign,
3294        bfd_reloc_code_real_type other)
3295 {
3296   if (other != NO_RELOC)
3297     {
3298       reloc_howto_type *rel;
3299
3300       if (size == 8)
3301         switch (other)
3302           {
3303           case BFD_RELOC_X86_64_GOT32:
3304             return BFD_RELOC_X86_64_GOT64;
3305             break;
3306           case BFD_RELOC_X86_64_GOTPLT64:
3307             return BFD_RELOC_X86_64_GOTPLT64;
3308             break;
3309           case BFD_RELOC_X86_64_PLTOFF64:
3310             return BFD_RELOC_X86_64_PLTOFF64;
3311             break;
3312           case BFD_RELOC_X86_64_GOTPC32:
3313             other = BFD_RELOC_X86_64_GOTPC64;
3314             break;
3315           case BFD_RELOC_X86_64_GOTPCREL:
3316             other = BFD_RELOC_X86_64_GOTPCREL64;
3317             break;
3318           case BFD_RELOC_X86_64_TPOFF32:
3319             other = BFD_RELOC_X86_64_TPOFF64;
3320             break;
3321           case BFD_RELOC_X86_64_DTPOFF32:
3322             other = BFD_RELOC_X86_64_DTPOFF64;
3323             break;
3324           default:
3325             break;
3326           }
3327
3328 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3329       if (other == BFD_RELOC_SIZE32)
3330         {
3331           if (size == 8)
3332             other = BFD_RELOC_SIZE64;
3333           if (pcrel)
3334             {
3335               as_bad (_("there are no pc-relative size relocations"));
3336               return NO_RELOC;
3337             }
3338         }
3339 #endif
3340
3341       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3342       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3343         sign = -1;
3344
3345       rel = bfd_reloc_type_lookup (stdoutput, other);
3346       if (!rel)
3347         as_bad (_("unknown relocation (%u)"), other);
3348       else if (size != bfd_get_reloc_size (rel))
3349         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3350                 bfd_get_reloc_size (rel),
3351                 size);
3352       else if (pcrel && !rel->pc_relative)
3353         as_bad (_("non-pc-relative relocation for pc-relative field"));
3354       else if ((rel->complain_on_overflow == complain_overflow_signed
3355                 && !sign)
3356                || (rel->complain_on_overflow == complain_overflow_unsigned
3357                    && sign > 0))
3358         as_bad (_("relocated field and relocation type differ in signedness"));
3359       else
3360         return other;
3361       return NO_RELOC;
3362     }
3363
3364   if (pcrel)
3365     {
3366       if (!sign)
3367         as_bad (_("there are no unsigned pc-relative relocations"));
3368       switch (size)
3369         {
3370         case 1: return BFD_RELOC_8_PCREL;
3371         case 2: return BFD_RELOC_16_PCREL;
3372         case 4: return BFD_RELOC_32_PCREL;
3373         case 8: return BFD_RELOC_64_PCREL;
3374         }
3375       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3376     }
3377   else
3378     {
3379       if (sign > 0)
3380         switch (size)
3381           {
3382           case 4: return BFD_RELOC_X86_64_32S;
3383           }
3384       else
3385         switch (size)
3386           {
3387           case 1: return BFD_RELOC_8;
3388           case 2: return BFD_RELOC_16;
3389           case 4: return BFD_RELOC_32;
3390           case 8: return BFD_RELOC_64;
3391           }
3392       as_bad (_("cannot do %s %u byte relocation"),
3393               sign > 0 ? "signed" : "unsigned", size);
3394     }
3395
3396   return NO_RELOC;
3397 }
3398
3399 /* Here we decide which fixups can be adjusted to make them relative to
3400    the beginning of the section instead of the symbol.  Basically we need
3401    to make sure that the dynamic relocations are done correctly, so in
3402    some cases we force the original symbol to be used.  */
3403
3404 int
3405 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3406 {
3407 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3408   if (!IS_ELF)
3409     return 1;
3410
3411   /* Don't adjust pc-relative references to merge sections in 64-bit
3412      mode.  */
3413   if (use_rela_relocations
3414       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3415       && fixP->fx_pcrel)
3416     return 0;
3417
3418   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3419      and changed later by validate_fix.  */
3420   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3421       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3422     return 0;
3423
3424   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3425      for size relocations.  */
3426   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3427       || fixP->fx_r_type == BFD_RELOC_SIZE64
3428       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3429       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3430       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3431       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3432       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3433       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3434       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3435       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3436       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3437       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3438       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3439       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3440       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3441       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3442       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3443       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3444       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3445       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3446       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3447       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3448       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3449       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3450       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3451       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3452       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3453       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3454       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3455       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3456       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3457     return 0;
3458 #endif
3459   return 1;
3460 }
3461
3462 static int
3463 intel_float_operand (const char *mnemonic)
3464 {
3465   /* Note that the value returned is meaningful only for opcodes with (memory)
3466      operands, hence the code here is free to improperly handle opcodes that
3467      have no operands (for better performance and smaller code). */
3468
3469   if (mnemonic[0] != 'f')
3470     return 0; /* non-math */
3471
3472   switch (mnemonic[1])
3473     {
3474     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3475        the fs segment override prefix not currently handled because no
3476        call path can make opcodes without operands get here */
3477     case 'i':
3478       return 2 /* integer op */;
3479     case 'l':
3480       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3481         return 3; /* fldcw/fldenv */
3482       break;
3483     case 'n':
3484       if (mnemonic[2] != 'o' /* fnop */)
3485         return 3; /* non-waiting control op */
3486       break;
3487     case 'r':
3488       if (mnemonic[2] == 's')
3489         return 3; /* frstor/frstpm */
3490       break;
3491     case 's':
3492       if (mnemonic[2] == 'a')
3493         return 3; /* fsave */
3494       if (mnemonic[2] == 't')
3495         {
3496           switch (mnemonic[3])
3497             {
3498             case 'c': /* fstcw */
3499             case 'd': /* fstdw */
3500             case 'e': /* fstenv */
3501             case 's': /* fsts[gw] */
3502               return 3;
3503             }
3504         }
3505       break;
3506     case 'x':
3507       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3508         return 0; /* fxsave/fxrstor are not really math ops */
3509       break;
3510     }
3511
3512   return 1;
3513 }
3514
3515 /* Build the VEX prefix.  */
3516
3517 static void
3518 build_vex_prefix (const insn_template *t)
3519 {
3520   unsigned int register_specifier;
3521   unsigned int implied_prefix;
3522   unsigned int vector_length;
3523   unsigned int w;
3524
3525   /* Check register specifier.  */
3526   if (i.vex.register_specifier)
3527     {
3528       register_specifier =
3529         ~register_number (i.vex.register_specifier) & 0xf;
3530       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3531     }
3532   else
3533     register_specifier = 0xf;
3534
3535   /* Use 2-byte VEX prefix by swapping destination and source operand
3536      if there are more than 1 register operand.  */
3537   if (i.reg_operands > 1
3538       && i.vec_encoding != vex_encoding_vex3
3539       && i.dir_encoding == dir_encoding_default
3540       && i.operands == i.reg_operands
3541       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3542       && i.tm.opcode_modifier.vexopcode == VEX0F
3543       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3544       && i.rex == REX_B)
3545     {
3546       unsigned int xchg = i.operands - 1;
3547       union i386_op temp_op;
3548       i386_operand_type temp_type;
3549
3550       temp_type = i.types[xchg];
3551       i.types[xchg] = i.types[0];
3552       i.types[0] = temp_type;
3553       temp_op = i.op[xchg];
3554       i.op[xchg] = i.op[0];
3555       i.op[0] = temp_op;
3556
3557       gas_assert (i.rm.mode == 3);
3558
3559       i.rex = REX_R;
3560       xchg = i.rm.regmem;
3561       i.rm.regmem = i.rm.reg;
3562       i.rm.reg = xchg;
3563
3564       if (i.tm.opcode_modifier.d)
3565         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3566                             ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3567       else /* Use the next insn.  */
3568         i.tm = t[1];
3569     }
3570
3571   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3572      are no memory operands and at least 3 register ones.  */
3573   if (i.reg_operands >= 3
3574       && i.vec_encoding != vex_encoding_vex3
3575       && i.reg_operands == i.operands - i.imm_operands
3576       && i.tm.opcode_modifier.vex
3577       && i.tm.opcode_modifier.commutative
3578       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3579       && i.rex == REX_B
3580       && i.vex.register_specifier
3581       && !(i.vex.register_specifier->reg_flags & RegRex))
3582     {
3583       unsigned int xchg = i.operands - i.reg_operands;
3584       union i386_op temp_op;
3585       i386_operand_type temp_type;
3586
3587       gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F);
3588       gas_assert (!i.tm.opcode_modifier.sae);
3589       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3590                                       &i.types[i.operands - 3]));
3591       gas_assert (i.rm.mode == 3);
3592
3593       temp_type = i.types[xchg];
3594       i.types[xchg] = i.types[xchg + 1];
3595       i.types[xchg + 1] = temp_type;
3596       temp_op = i.op[xchg];
3597       i.op[xchg] = i.op[xchg + 1];
3598       i.op[xchg + 1] = temp_op;
3599
3600       i.rex = 0;
3601       xchg = i.rm.regmem | 8;
3602       i.rm.regmem = ~register_specifier & 0xf;
3603       gas_assert (!(i.rm.regmem & 8));
3604       i.vex.register_specifier += xchg - i.rm.regmem;
3605       register_specifier = ~xchg & 0xf;
3606     }
3607
3608   if (i.tm.opcode_modifier.vex == VEXScalar)
3609     vector_length = avxscalar;
3610   else if (i.tm.opcode_modifier.vex == VEX256)
3611     vector_length = 1;
3612   else
3613     {
3614       unsigned int op;
3615
3616       /* Determine vector length from the last multi-length vector
3617          operand.  */
3618       vector_length = 0;
3619       for (op = t->operands; op--;)
3620         if (t->operand_types[op].bitfield.xmmword
3621             && t->operand_types[op].bitfield.ymmword
3622             && i.types[op].bitfield.ymmword)
3623           {
3624             vector_length = 1;
3625             break;
3626           }
3627     }
3628
3629   switch ((i.tm.base_opcode >> 8) & 0xff)
3630     {
3631     case 0:
3632       implied_prefix = 0;
3633       break;
3634     case DATA_PREFIX_OPCODE:
3635       implied_prefix = 1;
3636       break;
3637     case REPE_PREFIX_OPCODE:
3638       implied_prefix = 2;
3639       break;
3640     case REPNE_PREFIX_OPCODE:
3641       implied_prefix = 3;
3642       break;
3643     default:
3644       abort ();
3645     }
3646
3647   /* Check the REX.W bit and VEXW.  */
3648   if (i.tm.opcode_modifier.vexw == VEXWIG)
3649     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3650   else if (i.tm.opcode_modifier.vexw)
3651     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3652   else
3653     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3654
3655   /* Use 2-byte VEX prefix if possible.  */
3656   if (w == 0
3657       && i.vec_encoding != vex_encoding_vex3
3658       && i.tm.opcode_modifier.vexopcode == VEX0F
3659       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3660     {
3661       /* 2-byte VEX prefix.  */
3662       unsigned int r;
3663
3664       i.vex.length = 2;
3665       i.vex.bytes[0] = 0xc5;
3666
3667       /* Check the REX.R bit.  */
3668       r = (i.rex & REX_R) ? 0 : 1;
3669       i.vex.bytes[1] = (r << 7
3670                         | register_specifier << 3
3671                         | vector_length << 2
3672                         | implied_prefix);
3673     }
3674   else
3675     {
3676       /* 3-byte VEX prefix.  */
3677       unsigned int m;
3678
3679       i.vex.length = 3;
3680
3681       switch (i.tm.opcode_modifier.vexopcode)
3682         {
3683         case VEX0F:
3684           m = 0x1;
3685           i.vex.bytes[0] = 0xc4;
3686           break;
3687         case VEX0F38:
3688           m = 0x2;
3689           i.vex.bytes[0] = 0xc4;
3690           break;
3691         case VEX0F3A:
3692           m = 0x3;
3693           i.vex.bytes[0] = 0xc4;
3694           break;
3695         case XOP08:
3696           m = 0x8;
3697           i.vex.bytes[0] = 0x8f;
3698           break;
3699         case XOP09:
3700           m = 0x9;
3701           i.vex.bytes[0] = 0x8f;
3702           break;
3703         case XOP0A:
3704           m = 0xa;
3705           i.vex.bytes[0] = 0x8f;
3706           break;
3707         default:
3708           abort ();
3709         }
3710
3711       /* The high 3 bits of the second VEX byte are 1's compliment
3712          of RXB bits from REX.  */
3713       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3714
3715       i.vex.bytes[2] = (w << 7
3716                         | register_specifier << 3
3717                         | vector_length << 2
3718                         | implied_prefix);
3719     }
3720 }
3721
3722 static INLINE bfd_boolean
3723 is_evex_encoding (const insn_template *t)
3724 {
3725   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3726          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3727          || t->opcode_modifier.sae;
3728 }
3729
3730 static INLINE bfd_boolean
3731 is_any_vex_encoding (const insn_template *t)
3732 {
3733   return t->opcode_modifier.vex || t->opcode_modifier.vexopcode
3734          || is_evex_encoding (t);
3735 }
3736
3737 /* Build the EVEX prefix.  */
3738
3739 static void
3740 build_evex_prefix (void)
3741 {
3742   unsigned int register_specifier;
3743   unsigned int implied_prefix;
3744   unsigned int m, w;
3745   rex_byte vrex_used = 0;
3746
3747   /* Check register specifier.  */
3748   if (i.vex.register_specifier)
3749     {
3750       gas_assert ((i.vrex & REX_X) == 0);
3751
3752       register_specifier = i.vex.register_specifier->reg_num;
3753       if ((i.vex.register_specifier->reg_flags & RegRex))
3754         register_specifier += 8;
3755       /* The upper 16 registers are encoded in the fourth byte of the
3756          EVEX prefix.  */
3757       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3758         i.vex.bytes[3] = 0x8;
3759       register_specifier = ~register_specifier & 0xf;
3760     }
3761   else
3762     {
3763       register_specifier = 0xf;
3764
3765       /* Encode upper 16 vector index register in the fourth byte of
3766          the EVEX prefix.  */
3767       if (!(i.vrex & REX_X))
3768         i.vex.bytes[3] = 0x8;
3769       else
3770         vrex_used |= REX_X;
3771     }
3772
3773   switch ((i.tm.base_opcode >> 8) & 0xff)
3774     {
3775     case 0:
3776       implied_prefix = 0;
3777       break;
3778     case DATA_PREFIX_OPCODE:
3779       implied_prefix = 1;
3780       break;
3781     case REPE_PREFIX_OPCODE:
3782       implied_prefix = 2;
3783       break;
3784     case REPNE_PREFIX_OPCODE:
3785       implied_prefix = 3;
3786       break;
3787     default:
3788       abort ();
3789     }
3790
3791   /* 4 byte EVEX prefix.  */
3792   i.vex.length = 4;
3793   i.vex.bytes[0] = 0x62;
3794
3795   /* mmmm bits.  */
3796   switch (i.tm.opcode_modifier.vexopcode)
3797     {
3798     case VEX0F:
3799       m = 1;
3800       break;
3801     case VEX0F38:
3802       m = 2;
3803       break;
3804     case VEX0F3A:
3805       m = 3;
3806       break;
3807     default:
3808       abort ();
3809       break;
3810     }
3811
3812   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3813      bits from REX.  */
3814   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3815
3816   /* The fifth bit of the second EVEX byte is 1's compliment of the
3817      REX_R bit in VREX.  */
3818   if (!(i.vrex & REX_R))
3819     i.vex.bytes[1] |= 0x10;
3820   else
3821     vrex_used |= REX_R;
3822
3823   if ((i.reg_operands + i.imm_operands) == i.operands)
3824     {
3825       /* When all operands are registers, the REX_X bit in REX is not
3826          used.  We reuse it to encode the upper 16 registers, which is
3827          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3828          as 1's compliment.  */
3829       if ((i.vrex & REX_B))
3830         {
3831           vrex_used |= REX_B;
3832           i.vex.bytes[1] &= ~0x40;
3833         }
3834     }
3835
3836   /* EVEX instructions shouldn't need the REX prefix.  */
3837   i.vrex &= ~vrex_used;
3838   gas_assert (i.vrex == 0);
3839
3840   /* Check the REX.W bit and VEXW.  */
3841   if (i.tm.opcode_modifier.vexw == VEXWIG)
3842     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3843   else if (i.tm.opcode_modifier.vexw)
3844     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3845   else
3846     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3847
3848   /* Encode the U bit.  */
3849   implied_prefix |= 0x4;
3850
3851   /* The third byte of the EVEX prefix.  */
3852   i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
3853
3854   /* The fourth byte of the EVEX prefix.  */
3855   /* The zeroing-masking bit.  */
3856   if (i.mask && i.mask->zeroing)
3857     i.vex.bytes[3] |= 0x80;
3858
3859   /* Don't always set the broadcast bit if there is no RC.  */
3860   if (!i.rounding)
3861     {
3862       /* Encode the vector length.  */
3863       unsigned int vec_length;
3864
3865       if (!i.tm.opcode_modifier.evex
3866           || i.tm.opcode_modifier.evex == EVEXDYN)
3867         {
3868           unsigned int op;
3869
3870           /* Determine vector length from the last multi-length vector
3871              operand.  */
3872           vec_length = 0;
3873           for (op = i.operands; op--;)
3874             if (i.tm.operand_types[op].bitfield.xmmword
3875                 + i.tm.operand_types[op].bitfield.ymmword
3876                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3877               {
3878                 if (i.types[op].bitfield.zmmword)
3879                   {
3880                     i.tm.opcode_modifier.evex = EVEX512;
3881                     break;
3882                   }
3883                 else if (i.types[op].bitfield.ymmword)
3884                   {
3885                     i.tm.opcode_modifier.evex = EVEX256;
3886                     break;
3887                   }
3888                 else if (i.types[op].bitfield.xmmword)
3889                   {
3890                     i.tm.opcode_modifier.evex = EVEX128;
3891                     break;
3892                   }
3893                 else if (i.broadcast && (int) op == i.broadcast->operand)
3894                   {
3895                     switch (i.broadcast->bytes)
3896                       {
3897                         case 64:
3898                           i.tm.opcode_modifier.evex = EVEX512;
3899                           break;
3900                         case 32:
3901                           i.tm.opcode_modifier.evex = EVEX256;
3902                           break;
3903                         case 16:
3904                           i.tm.opcode_modifier.evex = EVEX128;
3905                           break;
3906                         default:
3907                           abort ();
3908                       }
3909                     break;
3910                   }
3911               }
3912
3913           if (op >= MAX_OPERANDS)
3914             abort ();
3915         }
3916
3917       switch (i.tm.opcode_modifier.evex)
3918         {
3919         case EVEXLIG: /* LL' is ignored */
3920           vec_length = evexlig << 5;
3921           break;
3922         case EVEX128:
3923           vec_length = 0 << 5;
3924           break;
3925         case EVEX256:
3926           vec_length = 1 << 5;
3927           break;
3928         case EVEX512:
3929           vec_length = 2 << 5;
3930           break;
3931         default:
3932           abort ();
3933           break;
3934         }
3935       i.vex.bytes[3] |= vec_length;
3936       /* Encode the broadcast bit.  */
3937       if (i.broadcast)
3938         i.vex.bytes[3] |= 0x10;
3939     }
3940   else
3941     {
3942       if (i.rounding->type != saeonly)
3943         i.vex.bytes[3] |= 0x10 | (i.rounding->type << 5);
3944       else
3945         i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3946     }
3947
3948   if (i.mask && i.mask->mask)
3949     i.vex.bytes[3] |= i.mask->mask->reg_num;
3950 }
3951
3952 static void
3953 process_immext (void)
3954 {
3955   expressionS *exp;
3956
3957   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3958      which is coded in the same place as an 8-bit immediate field
3959      would be.  Here we fake an 8-bit immediate operand from the
3960      opcode suffix stored in tm.extension_opcode.
3961
3962      AVX instructions also use this encoding, for some of
3963      3 argument instructions.  */
3964
3965   gas_assert (i.imm_operands <= 1
3966               && (i.operands <= 2
3967                   || (is_any_vex_encoding (&i.tm)
3968                       && i.operands <= 4)));
3969
3970   exp = &im_expressions[i.imm_operands++];
3971   i.op[i.operands].imms = exp;
3972   i.types[i.operands] = imm8;
3973   i.operands++;
3974   exp->X_op = O_constant;
3975   exp->X_add_number = i.tm.extension_opcode;
3976   i.tm.extension_opcode = None;
3977 }
3978
3979
3980 static int
3981 check_hle (void)
3982 {
3983   switch (i.tm.opcode_modifier.hleprefixok)
3984     {
3985     default:
3986       abort ();
3987     case HLEPrefixNone:
3988       as_bad (_("invalid instruction `%s' after `%s'"),
3989               i.tm.name, i.hle_prefix);
3990       return 0;
3991     case HLEPrefixLock:
3992       if (i.prefix[LOCK_PREFIX])
3993         return 1;
3994       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
3995       return 0;
3996     case HLEPrefixAny:
3997       return 1;
3998     case HLEPrefixRelease:
3999       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4000         {
4001           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4002                   i.tm.name);
4003           return 0;
4004         }
4005       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4006         {
4007           as_bad (_("memory destination needed for instruction `%s'"
4008                     " after `xrelease'"), i.tm.name);
4009           return 0;
4010         }
4011       return 1;
4012     }
4013 }
4014
4015 /* Try the shortest encoding by shortening operand size.  */
4016
4017 static void
4018 optimize_encoding (void)
4019 {
4020   unsigned int j;
4021
4022   if (optimize_for_space
4023       && !is_any_vex_encoding (&i.tm)
4024       && i.reg_operands == 1
4025       && i.imm_operands == 1
4026       && !i.types[1].bitfield.byte
4027       && i.op[0].imms->X_op == O_constant
4028       && fits_in_imm7 (i.op[0].imms->X_add_number)
4029       && (i.tm.base_opcode == 0xa8
4030           || (i.tm.base_opcode == 0xf6
4031               && i.tm.extension_opcode == 0x0)))
4032     {
4033       /* Optimize: -Os:
4034            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4035        */
4036       unsigned int base_regnum = i.op[1].regs->reg_num;
4037       if (flag_code == CODE_64BIT || base_regnum < 4)
4038         {
4039           i.types[1].bitfield.byte = 1;
4040           /* Ignore the suffix.  */
4041           i.suffix = 0;
4042           /* Convert to byte registers.  */
4043           if (i.types[1].bitfield.word)
4044             j = 16;
4045           else if (i.types[1].bitfield.dword)
4046             j = 32;
4047           else
4048             j = 48;
4049           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4050             j += 8;
4051           i.op[1].regs -= j;
4052         }
4053     }
4054   else if (flag_code == CODE_64BIT
4055            && !is_any_vex_encoding (&i.tm)
4056            && ((i.types[1].bitfield.qword
4057                 && i.reg_operands == 1
4058                 && i.imm_operands == 1
4059                 && i.op[0].imms->X_op == O_constant
4060                 && ((i.tm.base_opcode == 0xb8
4061                      && i.tm.extension_opcode == None
4062                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4063                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4064                         && ((i.tm.base_opcode == 0x24
4065                              || i.tm.base_opcode == 0xa8)
4066                             || (i.tm.base_opcode == 0x80
4067                                 && i.tm.extension_opcode == 0x4)
4068                             || ((i.tm.base_opcode == 0xf6
4069                                  || (i.tm.base_opcode | 1) == 0xc7)
4070                                 && i.tm.extension_opcode == 0x0)))
4071                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4072                         && i.tm.base_opcode == 0x83
4073                         && i.tm.extension_opcode == 0x4)))
4074                || (i.types[0].bitfield.qword
4075                    && ((i.reg_operands == 2
4076                         && i.op[0].regs == i.op[1].regs
4077                         && (i.tm.base_opcode == 0x30
4078                             || i.tm.base_opcode == 0x28))
4079                        || (i.reg_operands == 1
4080                            && i.operands == 1
4081                            && i.tm.base_opcode == 0x30)))))
4082     {
4083       /* Optimize: -O:
4084            andq $imm31, %r64   -> andl $imm31, %r32
4085            andq $imm7, %r64    -> andl $imm7, %r32
4086            testq $imm31, %r64  -> testl $imm31, %r32
4087            xorq %r64, %r64     -> xorl %r32, %r32
4088            subq %r64, %r64     -> subl %r32, %r32
4089            movq $imm31, %r64   -> movl $imm31, %r32
4090            movq $imm32, %r64   -> movl $imm32, %r32
4091         */
4092       i.tm.opcode_modifier.norex64 = 1;
4093       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4094         {
4095           /* Handle
4096                movq $imm31, %r64   -> movl $imm31, %r32
4097                movq $imm32, %r64   -> movl $imm32, %r32
4098            */
4099           i.tm.operand_types[0].bitfield.imm32 = 1;
4100           i.tm.operand_types[0].bitfield.imm32s = 0;
4101           i.tm.operand_types[0].bitfield.imm64 = 0;
4102           i.types[0].bitfield.imm32 = 1;
4103           i.types[0].bitfield.imm32s = 0;
4104           i.types[0].bitfield.imm64 = 0;
4105           i.types[1].bitfield.dword = 1;
4106           i.types[1].bitfield.qword = 0;
4107           if ((i.tm.base_opcode | 1) == 0xc7)
4108             {
4109               /* Handle
4110                    movq $imm31, %r64   -> movl $imm31, %r32
4111                */
4112               i.tm.base_opcode = 0xb8;
4113               i.tm.extension_opcode = None;
4114               i.tm.opcode_modifier.w = 0;
4115               i.tm.opcode_modifier.modrm = 0;
4116             }
4117         }
4118     }
4119   else if (optimize > 1
4120            && !optimize_for_space
4121            && !is_any_vex_encoding (&i.tm)
4122            && i.reg_operands == 2
4123            && i.op[0].regs == i.op[1].regs
4124            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4125                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4126            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4127     {
4128       /* Optimize: -O2:
4129            andb %rN, %rN  -> testb %rN, %rN
4130            andw %rN, %rN  -> testw %rN, %rN
4131            andq %rN, %rN  -> testq %rN, %rN
4132            orb %rN, %rN   -> testb %rN, %rN
4133            orw %rN, %rN   -> testw %rN, %rN
4134            orq %rN, %rN   -> testq %rN, %rN
4135
4136            and outside of 64-bit mode
4137
4138            andl %rN, %rN  -> testl %rN, %rN
4139            orl %rN, %rN   -> testl %rN, %rN
4140        */
4141       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4142     }
4143   else if (i.reg_operands == 3
4144            && i.op[0].regs == i.op[1].regs
4145            && !i.types[2].bitfield.xmmword
4146            && (i.tm.opcode_modifier.vex
4147                || ((!i.mask || i.mask->zeroing)
4148                    && !i.rounding
4149                    && is_evex_encoding (&i.tm)
4150                    && (i.vec_encoding != vex_encoding_evex
4151                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4152                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4153                        || (i.tm.operand_types[2].bitfield.zmmword
4154                            && i.types[2].bitfield.ymmword))))
4155            && ((i.tm.base_opcode == 0x55
4156                 || i.tm.base_opcode == 0x6655
4157                 || i.tm.base_opcode == 0x66df
4158                 || i.tm.base_opcode == 0x57
4159                 || i.tm.base_opcode == 0x6657
4160                 || i.tm.base_opcode == 0x66ef
4161                 || i.tm.base_opcode == 0x66f8
4162                 || i.tm.base_opcode == 0x66f9
4163                 || i.tm.base_opcode == 0x66fa
4164                 || i.tm.base_opcode == 0x66fb
4165                 || i.tm.base_opcode == 0x42
4166                 || i.tm.base_opcode == 0x6642
4167                 || i.tm.base_opcode == 0x47
4168                 || i.tm.base_opcode == 0x6647)
4169                && i.tm.extension_opcode == None))
4170     {
4171       /* Optimize: -O1:
4172            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4173            vpsubq and vpsubw:
4174              EVEX VOP %zmmM, %zmmM, %zmmN
4175                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4176                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4177              EVEX VOP %ymmM, %ymmM, %ymmN
4178                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4179                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4180              VEX VOP %ymmM, %ymmM, %ymmN
4181                -> VEX VOP %xmmM, %xmmM, %xmmN
4182            VOP, one of vpandn and vpxor:
4183              VEX VOP %ymmM, %ymmM, %ymmN
4184                -> VEX VOP %xmmM, %xmmM, %xmmN
4185            VOP, one of vpandnd and vpandnq:
4186              EVEX VOP %zmmM, %zmmM, %zmmN
4187                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4188                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4189              EVEX VOP %ymmM, %ymmM, %ymmN
4190                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4191                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4192            VOP, one of vpxord and vpxorq:
4193              EVEX VOP %zmmM, %zmmM, %zmmN
4194                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4195                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4196              EVEX VOP %ymmM, %ymmM, %ymmN
4197                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4198                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4199            VOP, one of kxord and kxorq:
4200              VEX VOP %kM, %kM, %kN
4201                -> VEX kxorw %kM, %kM, %kN
4202            VOP, one of kandnd and kandnq:
4203              VEX VOP %kM, %kM, %kN
4204                -> VEX kandnw %kM, %kM, %kN
4205        */
4206       if (is_evex_encoding (&i.tm))
4207         {
4208           if (i.vec_encoding != vex_encoding_evex)
4209             {
4210               i.tm.opcode_modifier.vex = VEX128;
4211               i.tm.opcode_modifier.vexw = VEXW0;
4212               i.tm.opcode_modifier.evex = 0;
4213             }
4214           else if (optimize > 1)
4215             i.tm.opcode_modifier.evex = EVEX128;
4216           else
4217             return;
4218         }
4219       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4220         {
4221           i.tm.base_opcode &= 0xff;
4222           i.tm.opcode_modifier.vexw = VEXW0;
4223         }
4224       else
4225         i.tm.opcode_modifier.vex = VEX128;
4226
4227       if (i.tm.opcode_modifier.vex)
4228         for (j = 0; j < 3; j++)
4229           {
4230             i.types[j].bitfield.xmmword = 1;
4231             i.types[j].bitfield.ymmword = 0;
4232           }
4233     }
4234   else if (i.vec_encoding != vex_encoding_evex
4235            && !i.types[0].bitfield.zmmword
4236            && !i.types[1].bitfield.zmmword
4237            && !i.mask
4238            && !i.broadcast
4239            && is_evex_encoding (&i.tm)
4240            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
4241                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
4242                || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
4243                || (i.tm.base_opcode & ~4) == 0x66db
4244                || (i.tm.base_opcode & ~4) == 0x66eb)
4245            && i.tm.extension_opcode == None)
4246     {
4247       /* Optimize: -O1:
4248            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4249            vmovdqu32 and vmovdqu64:
4250              EVEX VOP %xmmM, %xmmN
4251                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4252              EVEX VOP %ymmM, %ymmN
4253                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4254              EVEX VOP %xmmM, mem
4255                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4256              EVEX VOP %ymmM, mem
4257                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4258              EVEX VOP mem, %xmmN
4259                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4260              EVEX VOP mem, %ymmN
4261                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4262            VOP, one of vpand, vpandn, vpor, vpxor:
4263              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4264                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4265              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4266                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4267              EVEX VOP{d,q} mem, %xmmM, %xmmN
4268                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4269              EVEX VOP{d,q} mem, %ymmM, %ymmN
4270                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4271        */
4272       for (j = 0; j < i.operands; j++)
4273         if (operand_type_check (i.types[j], disp)
4274             && i.op[j].disps->X_op == O_constant)
4275           {
4276             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4277                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4278                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4279             int evex_disp8, vex_disp8;
4280             unsigned int memshift = i.memshift;
4281             offsetT n = i.op[j].disps->X_add_number;
4282
4283             evex_disp8 = fits_in_disp8 (n);
4284             i.memshift = 0;
4285             vex_disp8 = fits_in_disp8 (n);
4286             if (evex_disp8 != vex_disp8)
4287               {
4288                 i.memshift = memshift;
4289                 return;
4290               }
4291
4292             i.types[j].bitfield.disp8 = vex_disp8;
4293             break;
4294           }
4295       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
4296         i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
4297       i.tm.opcode_modifier.vex
4298         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4299       i.tm.opcode_modifier.vexw = VEXW0;
4300       /* VPAND, VPOR, and VPXOR are commutative.  */
4301       if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
4302         i.tm.opcode_modifier.commutative = 1;
4303       i.tm.opcode_modifier.evex = 0;
4304       i.tm.opcode_modifier.masking = 0;
4305       i.tm.opcode_modifier.broadcast = 0;
4306       i.tm.opcode_modifier.disp8memshift = 0;
4307       i.memshift = 0;
4308       if (j < i.operands)
4309         i.types[j].bitfield.disp8
4310           = fits_in_disp8 (i.op[j].disps->X_add_number);
4311     }
4312 }
4313
4314 /* This is the guts of the machine-dependent assembler.  LINE points to a
4315    machine dependent instruction.  This function is supposed to emit
4316    the frags/bytes it assembles to.  */
4317
4318 void
4319 md_assemble (char *line)
4320 {
4321   unsigned int j;
4322   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4323   const insn_template *t;
4324
4325   /* Initialize globals.  */
4326   memset (&i, '\0', sizeof (i));
4327   for (j = 0; j < MAX_OPERANDS; j++)
4328     i.reloc[j] = NO_RELOC;
4329   memset (disp_expressions, '\0', sizeof (disp_expressions));
4330   memset (im_expressions, '\0', sizeof (im_expressions));
4331   save_stack_p = save_stack;
4332
4333   /* First parse an instruction mnemonic & call i386_operand for the operands.
4334      We assume that the scrubber has arranged it so that line[0] is the valid
4335      start of a (possibly prefixed) mnemonic.  */
4336
4337   line = parse_insn (line, mnemonic);
4338   if (line == NULL)
4339     return;
4340   mnem_suffix = i.suffix;
4341
4342   line = parse_operands (line, mnemonic);
4343   this_operand = -1;
4344   xfree (i.memop1_string);
4345   i.memop1_string = NULL;
4346   if (line == NULL)
4347     return;
4348
4349   /* Now we've parsed the mnemonic into a set of templates, and have the
4350      operands at hand.  */
4351
4352   /* All Intel opcodes have reversed operands except for "bound", "enter",
4353      "monitor*", "mwait*", "tpause", and "umwait".  We also don't reverse
4354      intersegment "jmp" and "call" instructions with 2 immediate operands so
4355      that the immediate segment precedes the offset, as it does when in AT&T
4356      mode.  */
4357   if (intel_syntax
4358       && i.operands > 1
4359       && (strcmp (mnemonic, "bound") != 0)
4360       && (strcmp (mnemonic, "invlpga") != 0)
4361       && (strncmp (mnemonic, "monitor", 7) != 0)
4362       && (strncmp (mnemonic, "mwait", 5) != 0)
4363       && (strcmp (mnemonic, "tpause") != 0)
4364       && (strcmp (mnemonic, "umwait") != 0)
4365       && !(operand_type_check (i.types[0], imm)
4366            && operand_type_check (i.types[1], imm)))
4367     swap_operands ();
4368
4369   /* The order of the immediates should be reversed
4370      for 2 immediates extrq and insertq instructions */
4371   if (i.imm_operands == 2
4372       && (strcmp (mnemonic, "extrq") == 0
4373           || strcmp (mnemonic, "insertq") == 0))
4374       swap_2_operands (0, 1);
4375
4376   if (i.imm_operands)
4377     optimize_imm ();
4378
4379   /* Don't optimize displacement for movabs since it only takes 64bit
4380      displacement.  */
4381   if (i.disp_operands
4382       && i.disp_encoding != disp_encoding_32bit
4383       && (flag_code != CODE_64BIT
4384           || strcmp (mnemonic, "movabs") != 0))
4385     optimize_disp ();
4386
4387   /* Next, we find a template that matches the given insn,
4388      making sure the overlap of the given operands types is consistent
4389      with the template operand types.  */
4390
4391   if (!(t = match_template (mnem_suffix)))
4392     return;
4393
4394   if (sse_check != check_none
4395       && !i.tm.opcode_modifier.noavx
4396       && !i.tm.cpu_flags.bitfield.cpuavx
4397       && !i.tm.cpu_flags.bitfield.cpuavx512f
4398       && (i.tm.cpu_flags.bitfield.cpusse
4399           || i.tm.cpu_flags.bitfield.cpusse2
4400           || i.tm.cpu_flags.bitfield.cpusse3
4401           || i.tm.cpu_flags.bitfield.cpussse3
4402           || i.tm.cpu_flags.bitfield.cpusse4_1
4403           || i.tm.cpu_flags.bitfield.cpusse4_2
4404           || i.tm.cpu_flags.bitfield.cpusse4a
4405           || i.tm.cpu_flags.bitfield.cpupclmul
4406           || i.tm.cpu_flags.bitfield.cpuaes
4407           || i.tm.cpu_flags.bitfield.cpusha
4408           || i.tm.cpu_flags.bitfield.cpugfni))
4409     {
4410       (sse_check == check_warning
4411        ? as_warn
4412        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4413     }
4414
4415   if (i.tm.opcode_modifier.fwait)
4416     if (!add_prefix (FWAIT_OPCODE))
4417       return;
4418
4419   /* Check if REP prefix is OK.  */
4420   if (i.rep_prefix && !i.tm.opcode_modifier.repprefixok)
4421     {
4422       as_bad (_("invalid instruction `%s' after `%s'"),
4423                 i.tm.name, i.rep_prefix);
4424       return;
4425     }
4426
4427   /* Check for lock without a lockable instruction.  Destination operand
4428      must be memory unless it is xchg (0x86).  */
4429   if (i.prefix[LOCK_PREFIX]
4430       && (!i.tm.opcode_modifier.islockable
4431           || i.mem_operands == 0
4432           || (i.tm.base_opcode != 0x86
4433               && !(i.flags[i.operands - 1] & Operand_Mem))))
4434     {
4435       as_bad (_("expecting lockable instruction after `lock'"));
4436       return;
4437     }
4438
4439   /* Check for data size prefix on VEX/XOP/EVEX encoded insns.  */
4440   if (i.prefix[DATA_PREFIX] && is_any_vex_encoding (&i.tm))
4441     {
4442       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4443       return;
4444     }
4445
4446   /* Check if HLE prefix is OK.  */
4447   if (i.hle_prefix && !check_hle ())
4448     return;
4449
4450   /* Check BND prefix.  */
4451   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
4452     as_bad (_("expecting valid branch instruction after `bnd'"));
4453
4454   /* Check NOTRACK prefix.  */
4455   if (i.notrack_prefix && !i.tm.opcode_modifier.notrackprefixok)
4456     as_bad (_("expecting indirect branch instruction after `notrack'"));
4457
4458   if (i.tm.cpu_flags.bitfield.cpumpx)
4459     {
4460       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4461         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
4462       else if (flag_code != CODE_16BIT
4463                ? i.prefix[ADDR_PREFIX]
4464                : i.mem_operands && !i.prefix[ADDR_PREFIX])
4465         as_bad (_("16-bit address isn't allowed in MPX instructions"));
4466     }
4467
4468   /* Insert BND prefix.  */
4469   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
4470     {
4471       if (!i.prefix[BND_PREFIX])
4472         add_prefix (BND_PREFIX_OPCODE);
4473       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
4474         {
4475           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
4476           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
4477         }
4478     }
4479
4480   /* Check string instruction segment overrides.  */
4481   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
4482     {
4483       gas_assert (i.mem_operands);
4484       if (!check_string ())
4485         return;
4486       i.disp_operands = 0;
4487     }
4488
4489   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
4490     optimize_encoding ();
4491
4492   if (!process_suffix ())
4493     return;
4494
4495   /* Update operand types.  */
4496   for (j = 0; j < i.operands; j++)
4497     i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
4498
4499   /* Make still unresolved immediate matches conform to size of immediate
4500      given in i.suffix.  */
4501   if (!finalize_imm ())
4502     return;
4503
4504   if (i.types[0].bitfield.imm1)
4505     i.imm_operands = 0; /* kludge for shift insns.  */
4506
4507   /* We only need to check those implicit registers for instructions
4508      with 3 operands or less.  */
4509   if (i.operands <= 3)
4510     for (j = 0; j < i.operands; j++)
4511       if (i.types[j].bitfield.instance != InstanceNone
4512           && !i.types[j].bitfield.xmmword)
4513         i.reg_operands--;
4514
4515   /* ImmExt should be processed after SSE2AVX.  */
4516   if (!i.tm.opcode_modifier.sse2avx
4517       && i.tm.opcode_modifier.immext)
4518     process_immext ();
4519
4520   /* For insns with operands there are more diddles to do to the opcode.  */
4521   if (i.operands)
4522     {
4523       if (!process_operands ())
4524         return;
4525     }
4526   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
4527     {
4528       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
4529       as_warn (_("translating to `%sp'"), i.tm.name);
4530     }
4531
4532   if (is_any_vex_encoding (&i.tm))
4533     {
4534       if (!cpu_arch_flags.bitfield.cpui286)
4535         {
4536           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
4537                   i.tm.name);
4538           return;
4539         }
4540
4541       if (i.tm.opcode_modifier.vex)
4542         build_vex_prefix (t);
4543       else
4544         build_evex_prefix ();
4545     }
4546
4547   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
4548      instructions may define INT_OPCODE as well, so avoid this corner
4549      case for those instructions that use MODRM.  */
4550   if (i.tm.base_opcode == INT_OPCODE
4551       && !i.tm.opcode_modifier.modrm
4552       && i.op[0].imms->X_add_number == 3)
4553     {
4554       i.tm.base_opcode = INT3_OPCODE;
4555       i.imm_operands = 0;
4556     }
4557
4558   if ((i.tm.opcode_modifier.jump == JUMP
4559        || i.tm.opcode_modifier.jump == JUMP_BYTE
4560        || i.tm.opcode_modifier.jump == JUMP_DWORD)
4561       && i.op[0].disps->X_op == O_constant)
4562     {
4563       /* Convert "jmp constant" (and "call constant") to a jump (call) to
4564          the absolute address given by the constant.  Since ix86 jumps and
4565          calls are pc relative, we need to generate a reloc.  */
4566       i.op[0].disps->X_add_symbol = &abs_symbol;
4567       i.op[0].disps->X_op = O_symbol;
4568     }
4569
4570   /* For 8 bit registers we need an empty rex prefix.  Also if the
4571      instruction already has a prefix, we need to convert old
4572      registers to new ones.  */
4573
4574   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
4575        && (i.op[0].regs->reg_flags & RegRex64) != 0)
4576       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
4577           && (i.op[1].regs->reg_flags & RegRex64) != 0)
4578       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
4579            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
4580           && i.rex != 0))
4581     {
4582       int x;
4583
4584       i.rex |= REX_OPCODE;
4585       for (x = 0; x < 2; x++)
4586         {
4587           /* Look for 8 bit operand that uses old registers.  */
4588           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4589               && (i.op[x].regs->reg_flags & RegRex64) == 0)
4590             {
4591               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4592               /* In case it is "hi" register, give up.  */
4593               if (i.op[x].regs->reg_num > 3)
4594                 as_bad (_("can't encode register '%s%s' in an "
4595                           "instruction requiring REX prefix."),
4596                         register_prefix, i.op[x].regs->reg_name);
4597
4598               /* Otherwise it is equivalent to the extended register.
4599                  Since the encoding doesn't change this is merely
4600                  cosmetic cleanup for debug output.  */
4601
4602               i.op[x].regs = i.op[x].regs + 8;
4603             }
4604         }
4605     }
4606
4607   if (i.rex == 0 && i.rex_encoding)
4608     {
4609       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
4610          that uses legacy register.  If it is "hi" register, don't add
4611          the REX_OPCODE byte.  */
4612       int x;
4613       for (x = 0; x < 2; x++)
4614         if (i.types[x].bitfield.class == Reg
4615             && i.types[x].bitfield.byte
4616             && (i.op[x].regs->reg_flags & RegRex64) == 0
4617             && i.op[x].regs->reg_num > 3)
4618           {
4619             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4620             i.rex_encoding = FALSE;
4621             break;
4622           }
4623
4624       if (i.rex_encoding)
4625         i.rex = REX_OPCODE;
4626     }
4627
4628   if (i.rex != 0)
4629     add_prefix (REX_OPCODE | i.rex);
4630
4631   /* We are ready to output the insn.  */
4632   output_insn ();
4633
4634   last_insn.seg = now_seg;
4635
4636   if (i.tm.opcode_modifier.isprefix)
4637     {
4638       last_insn.kind = last_insn_prefix;
4639       last_insn.name = i.tm.name;
4640       last_insn.file = as_where (&last_insn.line);
4641     }
4642   else
4643     last_insn.kind = last_insn_other;
4644 }
4645
4646 static char *
4647 parse_insn (char *line, char *mnemonic)
4648 {
4649   char *l = line;
4650   char *token_start = l;
4651   char *mnem_p;
4652   int supported;
4653   const insn_template *t;
4654   char *dot_p = NULL;
4655
4656   while (1)
4657     {
4658       mnem_p = mnemonic;
4659       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
4660         {
4661           if (*mnem_p == '.')
4662             dot_p = mnem_p;
4663           mnem_p++;
4664           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
4665             {
4666               as_bad (_("no such instruction: `%s'"), token_start);
4667               return NULL;
4668             }
4669           l++;
4670         }
4671       if (!is_space_char (*l)
4672           && *l != END_OF_INSN
4673           && (intel_syntax
4674               || (*l != PREFIX_SEPARATOR
4675                   && *l != ',')))
4676         {
4677           as_bad (_("invalid character %s in mnemonic"),
4678                   output_invalid (*l));
4679           return NULL;
4680         }
4681       if (token_start == l)
4682         {
4683           if (!intel_syntax && *l == PREFIX_SEPARATOR)
4684             as_bad (_("expecting prefix; got nothing"));
4685           else
4686             as_bad (_("expecting mnemonic; got nothing"));
4687           return NULL;
4688         }
4689
4690       /* Look up instruction (or prefix) via hash table.  */
4691       current_templates = (const templates *) hash_find (op_hash, mnemonic);
4692
4693       if (*l != END_OF_INSN
4694           && (!is_space_char (*l) || l[1] != END_OF_INSN)
4695           && current_templates
4696           && current_templates->start->opcode_modifier.isprefix)
4697         {
4698           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
4699             {
4700               as_bad ((flag_code != CODE_64BIT
4701                        ? _("`%s' is only supported in 64-bit mode")
4702                        : _("`%s' is not supported in 64-bit mode")),
4703                       current_templates->start->name);
4704               return NULL;
4705             }
4706           /* If we are in 16-bit mode, do not allow addr16 or data16.
4707              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
4708           if ((current_templates->start->opcode_modifier.size == SIZE16
4709                || current_templates->start->opcode_modifier.size == SIZE32)
4710               && flag_code != CODE_64BIT
4711               && ((current_templates->start->opcode_modifier.size == SIZE32)
4712                   ^ (flag_code == CODE_16BIT)))
4713             {
4714               as_bad (_("redundant %s prefix"),
4715                       current_templates->start->name);
4716               return NULL;
4717             }
4718           if (current_templates->start->opcode_length == 0)
4719             {
4720               /* Handle pseudo prefixes.  */
4721               switch (current_templates->start->base_opcode)
4722                 {
4723                 case 0x0:
4724                   /* {disp8} */
4725                   i.disp_encoding = disp_encoding_8bit;
4726                   break;
4727                 case 0x1:
4728                   /* {disp32} */
4729                   i.disp_encoding = disp_encoding_32bit;
4730                   break;
4731                 case 0x2:
4732                   /* {load} */
4733                   i.dir_encoding = dir_encoding_load;
4734                   break;
4735                 case 0x3:
4736                   /* {store} */
4737                   i.dir_encoding = dir_encoding_store;
4738                   break;
4739                 case 0x4:
4740                   /* {vex} */
4741                   i.vec_encoding = vex_encoding_vex;
4742                   break;
4743                 case 0x5:
4744                   /* {vex3} */
4745                   i.vec_encoding = vex_encoding_vex3;
4746                   break;
4747                 case 0x6:
4748                   /* {evex} */
4749                   i.vec_encoding = vex_encoding_evex;
4750                   break;
4751                 case 0x7:
4752                   /* {rex} */
4753                   i.rex_encoding = TRUE;
4754                   break;
4755                 case 0x8:
4756                   /* {nooptimize} */
4757                   i.no_optimize = TRUE;
4758                   break;
4759                 default:
4760                   abort ();
4761                 }
4762             }
4763           else
4764             {
4765               /* Add prefix, checking for repeated prefixes.  */
4766               switch (add_prefix (current_templates->start->base_opcode))
4767                 {
4768                 case PREFIX_EXIST:
4769                   return NULL;
4770                 case PREFIX_DS:
4771                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
4772                     i.notrack_prefix = current_templates->start->name;
4773                   break;
4774                 case PREFIX_REP:
4775                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
4776                     i.hle_prefix = current_templates->start->name;
4777                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
4778                     i.bnd_prefix = current_templates->start->name;
4779                   else
4780                     i.rep_prefix = current_templates->start->name;
4781                   break;
4782                 default:
4783                   break;
4784                 }
4785             }
4786           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
4787           token_start = ++l;
4788         }
4789       else
4790         break;
4791     }
4792
4793   if (!current_templates)
4794     {
4795       /* Deprecated functionality (new code should use pseudo-prefixes instead):
4796          Check if we should swap operand or force 32bit displacement in
4797          encoding.  */
4798       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
4799         i.dir_encoding = dir_encoding_swap;
4800       else if (mnem_p - 3 == dot_p
4801                && dot_p[1] == 'd'
4802                && dot_p[2] == '8')
4803         i.disp_encoding = disp_encoding_8bit;
4804       else if (mnem_p - 4 == dot_p
4805                && dot_p[1] == 'd'
4806                && dot_p[2] == '3'
4807                && dot_p[3] == '2')
4808         i.disp_encoding = disp_encoding_32bit;
4809       else
4810         goto check_suffix;
4811       mnem_p = dot_p;
4812       *dot_p = '\0';
4813       current_templates = (const templates *) hash_find (op_hash, mnemonic);
4814     }
4815
4816   if (!current_templates)
4817     {
4818     check_suffix:
4819       if (mnem_p > mnemonic)
4820         {
4821           /* See if we can get a match by trimming off a suffix.  */
4822           switch (mnem_p[-1])
4823             {
4824             case WORD_MNEM_SUFFIX:
4825               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
4826                 i.suffix = SHORT_MNEM_SUFFIX;
4827               else
4828                 /* Fall through.  */
4829               case BYTE_MNEM_SUFFIX:
4830               case QWORD_MNEM_SUFFIX:
4831                 i.suffix = mnem_p[-1];
4832               mnem_p[-1] = '\0';
4833               current_templates = (const templates *) hash_find (op_hash,
4834                                                                  mnemonic);
4835               break;
4836             case SHORT_MNEM_SUFFIX:
4837             case LONG_MNEM_SUFFIX:
4838               if (!intel_syntax)
4839                 {
4840                   i.suffix = mnem_p[-1];
4841                   mnem_p[-1] = '\0';
4842                   current_templates = (const templates *) hash_find (op_hash,
4843                                                                      mnemonic);
4844                 }
4845               break;
4846
4847               /* Intel Syntax.  */
4848             case 'd':
4849               if (intel_syntax)
4850                 {
4851                   if (intel_float_operand (mnemonic) == 1)
4852                     i.suffix = SHORT_MNEM_SUFFIX;
4853                   else
4854                     i.suffix = LONG_MNEM_SUFFIX;
4855                   mnem_p[-1] = '\0';
4856                   current_templates = (const templates *) hash_find (op_hash,
4857                                                                      mnemonic);
4858                 }
4859               break;
4860             }
4861         }
4862
4863       if (!current_templates)
4864         {
4865           as_bad (_("no such instruction: `%s'"), token_start);
4866           return NULL;
4867         }
4868     }
4869
4870   if (current_templates->start->opcode_modifier.jump == JUMP
4871       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
4872     {
4873       /* Check for a branch hint.  We allow ",pt" and ",pn" for
4874          predict taken and predict not taken respectively.
4875          I'm not sure that branch hints actually do anything on loop
4876          and jcxz insns (JumpByte) for current Pentium4 chips.  They
4877          may work in the future and it doesn't hurt to accept them
4878          now.  */
4879       if (l[0] == ',' && l[1] == 'p')
4880         {
4881           if (l[2] == 't')
4882             {
4883               if (!add_prefix (DS_PREFIX_OPCODE))
4884                 return NULL;
4885               l += 3;
4886             }
4887           else if (l[2] == 'n')
4888             {
4889               if (!add_prefix (CS_PREFIX_OPCODE))
4890                 return NULL;
4891               l += 3;
4892             }
4893         }
4894     }
4895   /* Any other comma loses.  */
4896   if (*l == ',')
4897     {
4898       as_bad (_("invalid character %s in mnemonic"),
4899               output_invalid (*l));
4900       return NULL;
4901     }
4902
4903   /* Check if instruction is supported on specified architecture.  */
4904   supported = 0;
4905   for (t = current_templates->start; t < current_templates->end; ++t)
4906     {
4907       supported |= cpu_flags_match (t);
4908       if (supported == CPU_FLAGS_PERFECT_MATCH)
4909         {
4910           if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
4911             as_warn (_("use .code16 to ensure correct addressing mode"));
4912
4913           return l;
4914         }
4915     }
4916
4917   if (!(supported & CPU_FLAGS_64BIT_MATCH))
4918     as_bad (flag_code == CODE_64BIT
4919             ? _("`%s' is not supported in 64-bit mode")
4920             : _("`%s' is only supported in 64-bit mode"),
4921             current_templates->start->name);
4922   else
4923     as_bad (_("`%s' is not supported on `%s%s'"),
4924             current_templates->start->name,
4925             cpu_arch_name ? cpu_arch_name : default_arch,
4926             cpu_sub_arch_name ? cpu_sub_arch_name : "");
4927
4928   return NULL;
4929 }
4930
4931 static char *
4932 parse_operands (char *l, const char *mnemonic)
4933 {
4934   char *token_start;
4935
4936   /* 1 if operand is pending after ','.  */
4937   unsigned int expecting_operand = 0;
4938
4939   /* Non-zero if operand parens not balanced.  */
4940   unsigned int paren_not_balanced;
4941
4942   while (*l != END_OF_INSN)
4943     {
4944       /* Skip optional white space before operand.  */
4945       if (is_space_char (*l))
4946         ++l;
4947       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
4948         {
4949           as_bad (_("invalid character %s before operand %d"),
4950                   output_invalid (*l),
4951                   i.operands + 1);
4952           return NULL;
4953         }
4954       token_start = l;  /* After white space.  */
4955       paren_not_balanced = 0;
4956       while (paren_not_balanced || *l != ',')
4957         {
4958           if (*l == END_OF_INSN)
4959             {
4960               if (paren_not_balanced)
4961                 {
4962                   if (!intel_syntax)
4963                     as_bad (_("unbalanced parenthesis in operand %d."),
4964                             i.operands + 1);
4965                   else
4966                     as_bad (_("unbalanced brackets in operand %d."),
4967                             i.operands + 1);
4968                   return NULL;
4969                 }
4970               else
4971                 break;  /* we are done */
4972             }
4973           else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"')
4974             {
4975               as_bad (_("invalid character %s in operand %d"),
4976                       output_invalid (*l),
4977                       i.operands + 1);
4978               return NULL;
4979             }
4980           if (!intel_syntax)
4981             {
4982               if (*l == '(')
4983                 ++paren_not_balanced;
4984               if (*l == ')')
4985                 --paren_not_balanced;
4986             }
4987           else
4988             {
4989               if (*l == '[')
4990                 ++paren_not_balanced;
4991               if (*l == ']')
4992                 --paren_not_balanced;
4993             }
4994           l++;
4995         }
4996       if (l != token_start)
4997         {                       /* Yes, we've read in another operand.  */
4998           unsigned int operand_ok;
4999           this_operand = i.operands++;
5000           if (i.operands > MAX_OPERANDS)
5001             {
5002               as_bad (_("spurious operands; (%d operands/instruction max)"),
5003                       MAX_OPERANDS);
5004               return NULL;
5005             }
5006           i.types[this_operand].bitfield.unspecified = 1;
5007           /* Now parse operand adding info to 'i' as we go along.  */
5008           END_STRING_AND_SAVE (l);
5009
5010           if (i.mem_operands > 1)
5011             {
5012               as_bad (_("too many memory references for `%s'"),
5013                       mnemonic);
5014               return 0;
5015             }
5016
5017           if (intel_syntax)
5018             operand_ok =
5019               i386_intel_operand (token_start,
5020                                   intel_float_operand (mnemonic));
5021           else
5022             operand_ok = i386_att_operand (token_start);
5023
5024           RESTORE_END_STRING (l);
5025           if (!operand_ok)
5026             return NULL;
5027         }
5028       else
5029         {
5030           if (expecting_operand)
5031             {
5032             expecting_operand_after_comma:
5033               as_bad (_("expecting operand after ','; got nothing"));
5034               return NULL;
5035             }
5036           if (*l == ',')
5037             {
5038               as_bad (_("expecting operand before ','; got nothing"));
5039               return NULL;
5040             }
5041         }
5042
5043       /* Now *l must be either ',' or END_OF_INSN.  */
5044       if (*l == ',')
5045         {
5046           if (*++l == END_OF_INSN)
5047             {
5048               /* Just skip it, if it's \n complain.  */
5049               goto expecting_operand_after_comma;
5050             }
5051           expecting_operand = 1;
5052         }
5053     }
5054   return l;
5055 }
5056
5057 static void
5058 swap_2_operands (int xchg1, int xchg2)
5059 {
5060   union i386_op temp_op;
5061   i386_operand_type temp_type;
5062   unsigned int temp_flags;
5063   enum bfd_reloc_code_real temp_reloc;
5064
5065   temp_type = i.types[xchg2];
5066   i.types[xchg2] = i.types[xchg1];
5067   i.types[xchg1] = temp_type;
5068
5069   temp_flags = i.flags[xchg2];
5070   i.flags[xchg2] = i.flags[xchg1];
5071   i.flags[xchg1] = temp_flags;
5072
5073   temp_op = i.op[xchg2];
5074   i.op[xchg2] = i.op[xchg1];
5075   i.op[xchg1] = temp_op;
5076
5077   temp_reloc = i.reloc[xchg2];
5078   i.reloc[xchg2] = i.reloc[xchg1];
5079   i.reloc[xchg1] = temp_reloc;
5080
5081   if (i.mask)
5082     {
5083       if (i.mask->operand == xchg1)
5084         i.mask->operand = xchg2;
5085       else if (i.mask->operand == xchg2)
5086         i.mask->operand = xchg1;
5087     }
5088   if (i.broadcast)
5089     {
5090       if (i.broadcast->operand == xchg1)
5091         i.broadcast->operand = xchg2;
5092       else if (i.broadcast->operand == xchg2)
5093         i.broadcast->operand = xchg1;
5094     }
5095   if (i.rounding)
5096     {
5097       if (i.rounding->operand == xchg1)
5098         i.rounding->operand = xchg2;
5099       else if (i.rounding->operand == xchg2)
5100         i.rounding->operand = xchg1;
5101     }
5102 }
5103
5104 static void
5105 swap_operands (void)
5106 {
5107   switch (i.operands)
5108     {
5109     case 5:
5110     case 4:
5111       swap_2_operands (1, i.operands - 2);
5112       /* Fall through.  */
5113     case 3:
5114     case 2:
5115       swap_2_operands (0, i.operands - 1);
5116       break;
5117     default:
5118       abort ();
5119     }
5120
5121   if (i.mem_operands == 2)
5122     {
5123       const seg_entry *temp_seg;
5124       temp_seg = i.seg[0];
5125       i.seg[0] = i.seg[1];
5126       i.seg[1] = temp_seg;
5127     }
5128 }
5129
5130 /* Try to ensure constant immediates are represented in the smallest
5131    opcode possible.  */
5132 static void
5133 optimize_imm (void)
5134 {
5135   char guess_suffix = 0;
5136   int op;
5137
5138   if (i.suffix)
5139     guess_suffix = i.suffix;
5140   else if (i.reg_operands)
5141     {
5142       /* Figure out a suffix from the last register operand specified.
5143          We can't do this properly yet, i.e. excluding special register
5144          instances, but the following works for instructions with
5145          immediates.  In any case, we can't set i.suffix yet.  */
5146       for (op = i.operands; --op >= 0;)
5147         if (i.types[op].bitfield.class != Reg)
5148           continue;
5149         else if (i.types[op].bitfield.byte)
5150           {
5151             guess_suffix = BYTE_MNEM_SUFFIX;
5152             break;
5153           }
5154         else if (i.types[op].bitfield.word)
5155           {
5156             guess_suffix = WORD_MNEM_SUFFIX;
5157             break;
5158           }
5159         else if (i.types[op].bitfield.dword)
5160           {
5161             guess_suffix = LONG_MNEM_SUFFIX;
5162             break;
5163           }
5164         else if (i.types[op].bitfield.qword)
5165           {
5166             guess_suffix = QWORD_MNEM_SUFFIX;
5167             break;
5168           }
5169     }
5170   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5171     guess_suffix = WORD_MNEM_SUFFIX;
5172
5173   for (op = i.operands; --op >= 0;)
5174     if (operand_type_check (i.types[op], imm))
5175       {
5176         switch (i.op[op].imms->X_op)
5177           {
5178           case O_constant:
5179             /* If a suffix is given, this operand may be shortened.  */
5180             switch (guess_suffix)
5181               {
5182               case LONG_MNEM_SUFFIX:
5183                 i.types[op].bitfield.imm32 = 1;
5184                 i.types[op].bitfield.imm64 = 1;
5185                 break;
5186               case WORD_MNEM_SUFFIX:
5187                 i.types[op].bitfield.imm16 = 1;
5188                 i.types[op].bitfield.imm32 = 1;
5189                 i.types[op].bitfield.imm32s = 1;
5190                 i.types[op].bitfield.imm64 = 1;
5191                 break;
5192               case BYTE_MNEM_SUFFIX:
5193                 i.types[op].bitfield.imm8 = 1;
5194                 i.types[op].bitfield.imm8s = 1;
5195                 i.types[op].bitfield.imm16 = 1;
5196                 i.types[op].bitfield.imm32 = 1;
5197                 i.types[op].bitfield.imm32s = 1;
5198                 i.types[op].bitfield.imm64 = 1;
5199                 break;
5200               }
5201
5202             /* If this operand is at most 16 bits, convert it
5203                to a signed 16 bit number before trying to see
5204                whether it will fit in an even smaller size.
5205                This allows a 16-bit operand such as $0xffe0 to
5206                be recognised as within Imm8S range.  */
5207             if ((i.types[op].bitfield.imm16)
5208                 && (i.op[op].imms->X_add_number & ~(offsetT) 0xffff) == 0)
5209               {
5210                 i.op[op].imms->X_add_number =
5211                   (((i.op[op].imms->X_add_number & 0xffff) ^ 0x8000) - 0x8000);
5212               }
5213 #ifdef BFD64
5214             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5215             if ((i.types[op].bitfield.imm32)
5216                 && ((i.op[op].imms->X_add_number & ~(((offsetT) 2 << 31) - 1))
5217                     == 0))
5218               {
5219                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5220                                                 ^ ((offsetT) 1 << 31))
5221                                                - ((offsetT) 1 << 31));
5222               }
5223 #endif
5224             i.types[op]
5225               = operand_type_or (i.types[op],
5226                                  smallest_imm_type (i.op[op].imms->X_add_number));
5227
5228             /* We must avoid matching of Imm32 templates when 64bit
5229                only immediate is available.  */
5230             if (guess_suffix == QWORD_MNEM_SUFFIX)
5231               i.types[op].bitfield.imm32 = 0;
5232             break;
5233
5234           case O_absent:
5235           case O_register:
5236             abort ();
5237
5238             /* Symbols and expressions.  */
5239           default:
5240             /* Convert symbolic operand to proper sizes for matching, but don't
5241                prevent matching a set of insns that only supports sizes other
5242                than those matching the insn suffix.  */
5243             {
5244               i386_operand_type mask, allowed;
5245               const insn_template *t;
5246
5247               operand_type_set (&mask, 0);
5248               operand_type_set (&allowed, 0);
5249
5250               for (t = current_templates->start;
5251                    t < current_templates->end;
5252                    ++t)
5253                 {
5254                   allowed = operand_type_or (allowed, t->operand_types[op]);
5255                   allowed = operand_type_and (allowed, anyimm);
5256                 }
5257               switch (guess_suffix)
5258                 {
5259                 case QWORD_MNEM_SUFFIX:
5260                   mask.bitfield.imm64 = 1;
5261                   mask.bitfield.imm32s = 1;
5262                   break;
5263                 case LONG_MNEM_SUFFIX:
5264                   mask.bitfield.imm32 = 1;
5265                   break;
5266                 case WORD_MNEM_SUFFIX:
5267                   mask.bitfield.imm16 = 1;
5268                   break;
5269                 case BYTE_MNEM_SUFFIX:
5270                   mask.bitfield.imm8 = 1;
5271                   break;
5272                 default:
5273                   break;
5274                 }
5275               allowed = operand_type_and (mask, allowed);
5276               if (!operand_type_all_zero (&allowed))
5277                 i.types[op] = operand_type_and (i.types[op], mask);
5278             }
5279             break;
5280           }
5281       }
5282 }
5283
5284 /* Try to use the smallest displacement type too.  */
5285 static void
5286 optimize_disp (void)
5287 {
5288   int op;
5289
5290   for (op = i.operands; --op >= 0;)
5291     if (operand_type_check (i.types[op], disp))
5292       {
5293         if (i.op[op].disps->X_op == O_constant)
5294           {
5295             offsetT op_disp = i.op[op].disps->X_add_number;
5296
5297             if (i.types[op].bitfield.disp16
5298                 && (op_disp & ~(offsetT) 0xffff) == 0)
5299               {
5300                 /* If this operand is at most 16 bits, convert
5301                    to a signed 16 bit number and don't use 64bit
5302                    displacement.  */
5303                 op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
5304                 i.types[op].bitfield.disp64 = 0;
5305               }
5306 #ifdef BFD64
5307             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5308             if (i.types[op].bitfield.disp32
5309                 && (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
5310               {
5311                 /* If this operand is at most 32 bits, convert
5312                    to a signed 32 bit number and don't use 64bit
5313                    displacement.  */
5314                 op_disp &= (((offsetT) 2 << 31) - 1);
5315                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5316                 i.types[op].bitfield.disp64 = 0;
5317               }
5318 #endif
5319             if (!op_disp && i.types[op].bitfield.baseindex)
5320               {
5321                 i.types[op].bitfield.disp8 = 0;
5322                 i.types[op].bitfield.disp16 = 0;
5323                 i.types[op].bitfield.disp32 = 0;
5324                 i.types[op].bitfield.disp32s = 0;
5325                 i.types[op].bitfield.disp64 = 0;
5326                 i.op[op].disps = 0;
5327                 i.disp_operands--;
5328               }
5329             else if (flag_code == CODE_64BIT)
5330               {
5331                 if (fits_in_signed_long (op_disp))
5332                   {
5333                     i.types[op].bitfield.disp64 = 0;
5334                     i.types[op].bitfield.disp32s = 1;
5335                   }
5336                 if (i.prefix[ADDR_PREFIX]
5337                     && fits_in_unsigned_long (op_disp))
5338                   i.types[op].bitfield.disp32 = 1;
5339               }
5340             if ((i.types[op].bitfield.disp32
5341                  || i.types[op].bitfield.disp32s
5342                  || i.types[op].bitfield.disp16)
5343                 && fits_in_disp8 (op_disp))
5344               i.types[op].bitfield.disp8 = 1;
5345           }
5346         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5347                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5348           {
5349             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5350                          i.op[op].disps, 0, i.reloc[op]);
5351             i.types[op].bitfield.disp8 = 0;
5352             i.types[op].bitfield.disp16 = 0;
5353             i.types[op].bitfield.disp32 = 0;
5354             i.types[op].bitfield.disp32s = 0;
5355             i.types[op].bitfield.disp64 = 0;
5356           }
5357         else
5358           /* We only support 64bit displacement on constants.  */
5359           i.types[op].bitfield.disp64 = 0;
5360       }
5361 }
5362
5363 /* Return 1 if there is a match in broadcast bytes between operand
5364    GIVEN and instruction template T.   */
5365
5366 static INLINE int
5367 match_broadcast_size (const insn_template *t, unsigned int given)
5368 {
5369   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5370            && i.types[given].bitfield.byte)
5371           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5372               && i.types[given].bitfield.word)
5373           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5374               && i.types[given].bitfield.dword)
5375           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5376               && i.types[given].bitfield.qword));
5377 }
5378
5379 /* Check if operands are valid for the instruction.  */
5380
5381 static int
5382 check_VecOperands (const insn_template *t)
5383 {
5384   unsigned int op;
5385   i386_cpu_flags cpu;
5386
5387   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5388      any one operand are implicity requiring AVX512VL support if the actual
5389      operand size is YMMword or XMMword.  Since this function runs after
5390      template matching, there's no need to check for YMMword/XMMword in
5391      the template.  */
5392   cpu = cpu_flags_and (t->cpu_flags, avx512);
5393   if (!cpu_flags_all_zero (&cpu)
5394       && !t->cpu_flags.bitfield.cpuavx512vl
5395       && !cpu_arch_flags.bitfield.cpuavx512vl)
5396     {
5397       for (op = 0; op < t->operands; ++op)
5398         {
5399           if (t->operand_types[op].bitfield.zmmword
5400               && (i.types[op].bitfield.ymmword
5401                   || i.types[op].bitfield.xmmword))
5402             {
5403               i.error = unsupported;
5404               return 1;
5405             }
5406         }
5407     }
5408
5409   /* Without VSIB byte, we can't have a vector register for index.  */
5410   if (!t->opcode_modifier.vecsib
5411       && i.index_reg
5412       && (i.index_reg->reg_type.bitfield.xmmword
5413           || i.index_reg->reg_type.bitfield.ymmword
5414           || i.index_reg->reg_type.bitfield.zmmword))
5415     {
5416       i.error = unsupported_vector_index_register;
5417       return 1;
5418     }
5419
5420   /* Check if default mask is allowed.  */
5421   if (t->opcode_modifier.nodefmask
5422       && (!i.mask || i.mask->mask->reg_num == 0))
5423     {
5424       i.error = no_default_mask;
5425       return 1;
5426     }
5427
5428   /* For VSIB byte, we need a vector register for index, and all vector
5429      registers must be distinct.  */
5430   if (t->opcode_modifier.vecsib)
5431     {
5432       if (!i.index_reg
5433           || !((t->opcode_modifier.vecsib == VecSIB128
5434                 && i.index_reg->reg_type.bitfield.xmmword)
5435                || (t->opcode_modifier.vecsib == VecSIB256
5436                    && i.index_reg->reg_type.bitfield.ymmword)
5437                || (t->opcode_modifier.vecsib == VecSIB512
5438                    && i.index_reg->reg_type.bitfield.zmmword)))
5439       {
5440         i.error = invalid_vsib_address;
5441         return 1;
5442       }
5443
5444       gas_assert (i.reg_operands == 2 || i.mask);
5445       if (i.reg_operands == 2 && !i.mask)
5446         {
5447           gas_assert (i.types[0].bitfield.class == RegSIMD);
5448           gas_assert (i.types[0].bitfield.xmmword
5449                       || i.types[0].bitfield.ymmword);
5450           gas_assert (i.types[2].bitfield.class == RegSIMD);
5451           gas_assert (i.types[2].bitfield.xmmword
5452                       || i.types[2].bitfield.ymmword);
5453           if (operand_check == check_none)
5454             return 0;
5455           if (register_number (i.op[0].regs)
5456               != register_number (i.index_reg)
5457               && register_number (i.op[2].regs)
5458                  != register_number (i.index_reg)
5459               && register_number (i.op[0].regs)
5460                  != register_number (i.op[2].regs))
5461             return 0;
5462           if (operand_check == check_error)
5463             {
5464               i.error = invalid_vector_register_set;
5465               return 1;
5466             }
5467           as_warn (_("mask, index, and destination registers should be distinct"));
5468         }
5469       else if (i.reg_operands == 1 && i.mask)
5470         {
5471           if (i.types[1].bitfield.class == RegSIMD
5472               && (i.types[1].bitfield.xmmword
5473                   || i.types[1].bitfield.ymmword
5474                   || i.types[1].bitfield.zmmword)
5475               && (register_number (i.op[1].regs)
5476                   == register_number (i.index_reg)))
5477             {
5478               if (operand_check == check_error)
5479                 {
5480                   i.error = invalid_vector_register_set;
5481                   return 1;
5482                 }
5483               if (operand_check != check_none)
5484                 as_warn (_("index and destination registers should be distinct"));
5485             }
5486         }
5487     }
5488
5489   /* Check if broadcast is supported by the instruction and is applied
5490      to the memory operand.  */
5491   if (i.broadcast)
5492     {
5493       i386_operand_type type, overlap;
5494
5495       /* Check if specified broadcast is supported in this instruction,
5496          and its broadcast bytes match the memory operand.  */
5497       op = i.broadcast->operand;
5498       if (!t->opcode_modifier.broadcast
5499           || !(i.flags[op] & Operand_Mem)
5500           || (!i.types[op].bitfield.unspecified
5501               && !match_broadcast_size (t, op)))
5502         {
5503         bad_broadcast:
5504           i.error = unsupported_broadcast;
5505           return 1;
5506         }
5507
5508       i.broadcast->bytes = ((1 << (t->opcode_modifier.broadcast - 1))
5509                             * i.broadcast->type);
5510       operand_type_set (&type, 0);
5511       switch (i.broadcast->bytes)
5512         {
5513         case 2:
5514           type.bitfield.word = 1;
5515           break;
5516         case 4:
5517           type.bitfield.dword = 1;
5518           break;
5519         case 8:
5520           type.bitfield.qword = 1;
5521           break;
5522         case 16:
5523           type.bitfield.xmmword = 1;
5524           break;
5525         case 32:
5526           type.bitfield.ymmword = 1;
5527           break;
5528         case 64:
5529           type.bitfield.zmmword = 1;
5530           break;
5531         default:
5532           goto bad_broadcast;
5533         }
5534
5535       overlap = operand_type_and (type, t->operand_types[op]);
5536       if (operand_type_all_zero (&overlap))
5537           goto bad_broadcast;
5538
5539       if (t->opcode_modifier.checkregsize)
5540         {
5541           unsigned int j;
5542
5543           type.bitfield.baseindex = 1;
5544           for (j = 0; j < i.operands; ++j)
5545             {
5546               if (j != op
5547                   && !operand_type_register_match(i.types[j],
5548                                                   t->operand_types[j],
5549                                                   type,
5550                                                   t->operand_types[op]))
5551                 goto bad_broadcast;
5552             }
5553         }
5554     }
5555   /* If broadcast is supported in this instruction, we need to check if
5556      operand of one-element size isn't specified without broadcast.  */
5557   else if (t->opcode_modifier.broadcast && i.mem_operands)
5558     {
5559       /* Find memory operand.  */
5560       for (op = 0; op < i.operands; op++)
5561         if (i.flags[op] & Operand_Mem)
5562           break;
5563       gas_assert (op < i.operands);
5564       /* Check size of the memory operand.  */
5565       if (match_broadcast_size (t, op))
5566         {
5567           i.error = broadcast_needed;
5568           return 1;
5569         }
5570     }
5571   else
5572     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
5573
5574   /* Check if requested masking is supported.  */
5575   if (i.mask)
5576     {
5577       switch (t->opcode_modifier.masking)
5578         {
5579         case BOTH_MASKING:
5580           break;
5581         case MERGING_MASKING:
5582           if (i.mask->zeroing)
5583             {
5584         case 0:
5585               i.error = unsupported_masking;
5586               return 1;
5587             }
5588           break;
5589         case DYNAMIC_MASKING:
5590           /* Memory destinations allow only merging masking.  */
5591           if (i.mask->zeroing && i.mem_operands)
5592             {
5593               /* Find memory operand.  */
5594               for (op = 0; op < i.operands; op++)
5595                 if (i.flags[op] & Operand_Mem)
5596                   break;
5597               gas_assert (op < i.operands);
5598               if (op == i.operands - 1)
5599                 {
5600                   i.error = unsupported_masking;
5601                   return 1;
5602                 }
5603             }
5604           break;
5605         default:
5606           abort ();
5607         }
5608     }
5609
5610   /* Check if masking is applied to dest operand.  */
5611   if (i.mask && (i.mask->operand != (int) (i.operands - 1)))
5612     {
5613       i.error = mask_not_on_destination;
5614       return 1;
5615     }
5616
5617   /* Check RC/SAE.  */
5618   if (i.rounding)
5619     {
5620       if (!t->opcode_modifier.sae
5621           || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
5622         {
5623           i.error = unsupported_rc_sae;
5624           return 1;
5625         }
5626       /* If the instruction has several immediate operands and one of
5627          them is rounding, the rounding operand should be the last
5628          immediate operand.  */
5629       if (i.imm_operands > 1
5630           && i.rounding->operand != (int) (i.imm_operands - 1))
5631         {
5632           i.error = rc_sae_operand_not_last_imm;
5633           return 1;
5634         }
5635     }
5636
5637   /* Check vector Disp8 operand.  */
5638   if (t->opcode_modifier.disp8memshift
5639       && i.disp_encoding != disp_encoding_32bit)
5640     {
5641       if (i.broadcast)
5642         i.memshift = t->opcode_modifier.broadcast - 1;
5643       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
5644         i.memshift = t->opcode_modifier.disp8memshift;
5645       else
5646         {
5647           const i386_operand_type *type = NULL;
5648
5649           i.memshift = 0;
5650           for (op = 0; op < i.operands; op++)
5651             if (i.flags[op] & Operand_Mem)
5652               {
5653                 if (t->opcode_modifier.evex == EVEXLIG)
5654                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
5655                 else if (t->operand_types[op].bitfield.xmmword
5656                          + t->operand_types[op].bitfield.ymmword
5657                          + t->operand_types[op].bitfield.zmmword <= 1)
5658                   type = &t->operand_types[op];
5659                 else if (!i.types[op].bitfield.unspecified)
5660                   type = &i.types[op];
5661               }
5662             else if (i.types[op].bitfield.class == RegSIMD
5663                      && t->opcode_modifier.evex != EVEXLIG)
5664               {
5665                 if (i.types[op].bitfield.zmmword)
5666                   i.memshift = 6;
5667                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
5668                   i.memshift = 5;
5669                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
5670                   i.memshift = 4;
5671               }
5672
5673           if (type)
5674             {
5675               if (type->bitfield.zmmword)
5676                 i.memshift = 6;
5677               else if (type->bitfield.ymmword)
5678                 i.memshift = 5;
5679               else if (type->bitfield.xmmword)
5680                 i.memshift = 4;
5681             }
5682
5683           /* For the check in fits_in_disp8().  */
5684           if (i.memshift == 0)
5685             i.memshift = -1;
5686         }
5687
5688       for (op = 0; op < i.operands; op++)
5689         if (operand_type_check (i.types[op], disp)
5690             && i.op[op].disps->X_op == O_constant)
5691           {
5692             if (fits_in_disp8 (i.op[op].disps->X_add_number))
5693               {
5694                 i.types[op].bitfield.disp8 = 1;
5695                 return 0;
5696               }
5697             i.types[op].bitfield.disp8 = 0;
5698           }
5699     }
5700
5701   i.memshift = 0;
5702
5703   return 0;
5704 }
5705
5706 /* Check if operands are valid for the instruction.  Update VEX
5707    operand types.  */
5708
5709 static int
5710 VEX_check_operands (const insn_template *t)
5711 {
5712   if (i.vec_encoding == vex_encoding_evex)
5713     {
5714       /* This instruction must be encoded with EVEX prefix.  */
5715       if (!is_evex_encoding (t))
5716         {
5717           i.error = unsupported;
5718           return 1;
5719         }
5720       return 0;
5721     }
5722
5723   if (!t->opcode_modifier.vex)
5724     {
5725       /* This instruction template doesn't have VEX prefix.  */
5726       if (i.vec_encoding != vex_encoding_default)
5727         {
5728           i.error = unsupported;
5729           return 1;
5730         }
5731       return 0;
5732     }
5733
5734   /* Check the special Imm4 cases; must be the first operand.  */
5735   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
5736     {
5737       if (i.op[0].imms->X_op != O_constant
5738           || !fits_in_imm4 (i.op[0].imms->X_add_number))
5739         {
5740           i.error = bad_imm4;
5741           return 1;
5742         }
5743
5744       /* Turn off Imm<N> so that update_imm won't complain.  */
5745       operand_type_set (&i.types[0], 0);
5746     }
5747
5748   return 0;
5749 }
5750
5751 static const insn_template *
5752 match_template (char mnem_suffix)
5753 {
5754   /* Points to template once we've found it.  */
5755   const insn_template *t;
5756   i386_operand_type overlap0, overlap1, overlap2, overlap3;
5757   i386_operand_type overlap4;
5758   unsigned int found_reverse_match;
5759   i386_opcode_modifier suffix_check;
5760   i386_operand_type operand_types [MAX_OPERANDS];
5761   int addr_prefix_disp;
5762   unsigned int j, size_match, check_register;
5763   enum i386_error specific_error = 0;
5764
5765 #if MAX_OPERANDS != 5
5766 # error "MAX_OPERANDS must be 5."
5767 #endif
5768
5769   found_reverse_match = 0;
5770   addr_prefix_disp = -1;
5771
5772   /* Prepare for mnemonic suffix check.  */
5773   memset (&suffix_check, 0, sizeof (suffix_check));
5774   switch (mnem_suffix)
5775     {
5776     case BYTE_MNEM_SUFFIX:
5777       suffix_check.no_bsuf = 1;
5778       break;
5779     case WORD_MNEM_SUFFIX:
5780       suffix_check.no_wsuf = 1;
5781       break;
5782     case SHORT_MNEM_SUFFIX:
5783       suffix_check.no_ssuf = 1;
5784       break;
5785     case LONG_MNEM_SUFFIX:
5786       suffix_check.no_lsuf = 1;
5787       break;
5788     case QWORD_MNEM_SUFFIX:
5789       suffix_check.no_qsuf = 1;
5790       break;
5791     default:
5792       /* NB: In Intel syntax, normally we can check for memory operand
5793          size when there is no mnemonic suffix.  But jmp and call have
5794          2 different encodings with Dword memory operand size, one with
5795          No_ldSuf and the other without.  i.suffix is set to
5796          LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
5797       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
5798         suffix_check.no_ldsuf = 1;
5799     }
5800
5801   /* Must have right number of operands.  */
5802   i.error = number_of_operands_mismatch;
5803
5804   for (t = current_templates->start; t < current_templates->end; t++)
5805     {
5806       addr_prefix_disp = -1;
5807       found_reverse_match = 0;
5808
5809       if (i.operands != t->operands)
5810         continue;
5811
5812       /* Check processor support.  */
5813       i.error = unsupported;
5814       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
5815         continue;
5816
5817       /* Check AT&T mnemonic.   */
5818       i.error = unsupported_with_intel_mnemonic;
5819       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
5820         continue;
5821
5822       /* Check AT&T/Intel syntax.  */
5823       i.error = unsupported_syntax;
5824       if ((intel_syntax && t->opcode_modifier.attsyntax)
5825           || (!intel_syntax && t->opcode_modifier.intelsyntax))
5826         continue;
5827
5828       /* Check Intel64/AMD64 ISA.   */
5829       switch (isa64)
5830         {
5831         default:
5832           /* Default: Don't accept Intel64.  */
5833           if (t->opcode_modifier.isa64 == INTEL64)
5834             continue;
5835           break;
5836         case amd64:
5837           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
5838           if (t->opcode_modifier.isa64 >= INTEL64)
5839             continue;
5840           break;
5841         case intel64:
5842           /* -mintel64: Don't accept AMD64.  */
5843           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
5844             continue;
5845           break;
5846         }
5847
5848       /* Check the suffix.  */
5849       i.error = invalid_instruction_suffix;
5850       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
5851           || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
5852           || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
5853           || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
5854           || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
5855           || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
5856         continue;
5857
5858       size_match = operand_size_match (t);
5859       if (!size_match)
5860         continue;
5861
5862       /* This is intentionally not
5863
5864          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
5865
5866          as the case of a missing * on the operand is accepted (perhaps with
5867          a warning, issued further down).  */
5868       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
5869         {
5870           i.error = operand_type_mismatch;
5871           continue;
5872         }
5873
5874       for (j = 0; j < MAX_OPERANDS; j++)
5875         operand_types[j] = t->operand_types[j];
5876
5877       /* In general, don't allow
5878          - 64-bit operands outside of 64-bit mode,
5879          - 32-bit operands on pre-386.  */
5880       j = i.imm_operands + (t->operands > i.imm_operands + 1);
5881       if (((i.suffix == QWORD_MNEM_SUFFIX
5882             && flag_code != CODE_64BIT
5883             && (t->base_opcode != 0x0fc7
5884                 || t->extension_opcode != 1 /* cmpxchg8b */))
5885            || (i.suffix == LONG_MNEM_SUFFIX
5886                && !cpu_arch_flags.bitfield.cpui386))
5887           && (intel_syntax
5888               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
5889                  && !intel_float_operand (t->name))
5890               : intel_float_operand (t->name) != 2)
5891           && (t->operands == i.imm_operands
5892               || (operand_types[i.imm_operands].bitfield.class != RegMMX
5893                && operand_types[i.imm_operands].bitfield.class != RegSIMD
5894                && operand_types[i.imm_operands].bitfield.class != RegMask)
5895               || (operand_types[j].bitfield.class != RegMMX
5896                   && operand_types[j].bitfield.class != RegSIMD
5897                   && operand_types[j].bitfield.class != RegMask))
5898           && !t->opcode_modifier.vecsib)
5899         continue;
5900
5901       /* Do not verify operands when there are none.  */
5902       if (!t->operands)
5903         /* We've found a match; break out of loop.  */
5904         break;
5905
5906       if (!t->opcode_modifier.jump
5907           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
5908         {
5909           /* There should be only one Disp operand.  */
5910           for (j = 0; j < MAX_OPERANDS; j++)
5911             if (operand_type_check (operand_types[j], disp))
5912               break;
5913           if (j < MAX_OPERANDS)
5914             {
5915               bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
5916
5917               addr_prefix_disp = j;
5918
5919               /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
5920                  operand into Disp32/Disp32/Disp16/Disp32 operand.  */
5921               switch (flag_code)
5922                 {
5923                 case CODE_16BIT:
5924                   override = !override;
5925                   /* Fall through.  */
5926                 case CODE_32BIT:
5927                   if (operand_types[j].bitfield.disp32
5928                       && operand_types[j].bitfield.disp16)
5929                     {
5930                       operand_types[j].bitfield.disp16 = override;
5931                       operand_types[j].bitfield.disp32 = !override;
5932                     }
5933                   operand_types[j].bitfield.disp32s = 0;
5934                   operand_types[j].bitfield.disp64 = 0;
5935                   break;
5936
5937                 case CODE_64BIT:
5938                   if (operand_types[j].bitfield.disp32s
5939                       || operand_types[j].bitfield.disp64)
5940                     {
5941                       operand_types[j].bitfield.disp64 &= !override;
5942                       operand_types[j].bitfield.disp32s &= !override;
5943                       operand_types[j].bitfield.disp32 = override;
5944                     }
5945                   operand_types[j].bitfield.disp16 = 0;
5946                   break;
5947                 }
5948             }
5949         }
5950
5951       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
5952       if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
5953         continue;
5954
5955       /* We check register size if needed.  */
5956       if (t->opcode_modifier.checkregsize)
5957         {
5958           check_register = (1 << t->operands) - 1;
5959           if (i.broadcast)
5960             check_register &= ~(1 << i.broadcast->operand);
5961         }
5962       else
5963         check_register = 0;
5964
5965       overlap0 = operand_type_and (i.types[0], operand_types[0]);
5966       switch (t->operands)
5967         {
5968         case 1:
5969           if (!operand_type_match (overlap0, i.types[0]))
5970             continue;
5971           break;
5972         case 2:
5973           /* xchg %eax, %eax is a special case. It is an alias for nop
5974              only in 32bit mode and we can use opcode 0x90.  In 64bit
5975              mode, we can't use 0x90 for xchg %eax, %eax since it should
5976              zero-extend %eax to %rax.  */
5977           if (flag_code == CODE_64BIT
5978               && t->base_opcode == 0x90
5979               && i.types[0].bitfield.instance == Accum
5980               && i.types[0].bitfield.dword
5981               && i.types[1].bitfield.instance == Accum
5982               && i.types[1].bitfield.dword)
5983             continue;
5984           /* xrelease mov %eax, <disp> is another special case. It must not
5985              match the accumulator-only encoding of mov.  */
5986           if (flag_code != CODE_64BIT
5987               && i.hle_prefix
5988               && t->base_opcode == 0xa0
5989               && i.types[0].bitfield.instance == Accum
5990               && (i.flags[1] & Operand_Mem))
5991             continue;
5992           /* Fall through.  */
5993
5994         case 3:
5995           if (!(size_match & MATCH_STRAIGHT))
5996             goto check_reverse;
5997           /* Reverse direction of operands if swapping is possible in the first
5998              place (operands need to be symmetric) and
5999              - the load form is requested, and the template is a store form,
6000              - the store form is requested, and the template is a load form,
6001              - the non-default (swapped) form is requested.  */
6002           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6003           if (t->opcode_modifier.d && i.reg_operands == i.operands
6004               && !operand_type_all_zero (&overlap1))
6005             switch (i.dir_encoding)
6006               {
6007               case dir_encoding_load:
6008                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6009                     || t->opcode_modifier.regmem)
6010                   goto check_reverse;
6011                 break;
6012
6013               case dir_encoding_store:
6014                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6015                     && !t->opcode_modifier.regmem)
6016                   goto check_reverse;
6017                 break;
6018
6019               case dir_encoding_swap:
6020                 goto check_reverse;
6021
6022               case dir_encoding_default:
6023                 break;
6024               }
6025           /* If we want store form, we skip the current load.  */
6026           if ((i.dir_encoding == dir_encoding_store
6027                || i.dir_encoding == dir_encoding_swap)
6028               && i.mem_operands == 0
6029               && t->opcode_modifier.load)
6030             continue;
6031           /* Fall through.  */
6032         case 4:
6033         case 5:
6034           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6035           if (!operand_type_match (overlap0, i.types[0])
6036               || !operand_type_match (overlap1, i.types[1])
6037               || ((check_register & 3) == 3
6038                   && !operand_type_register_match (i.types[0],
6039                                                    operand_types[0],
6040                                                    i.types[1],
6041                                                    operand_types[1])))
6042             {
6043               /* Check if other direction is valid ...  */
6044               if (!t->opcode_modifier.d)
6045                 continue;
6046
6047             check_reverse:
6048               if (!(size_match & MATCH_REVERSE))
6049                 continue;
6050               /* Try reversing direction of operands.  */
6051               overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6052               overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6053               if (!operand_type_match (overlap0, i.types[0])
6054                   || !operand_type_match (overlap1, i.types[i.operands - 1])
6055                   || (check_register
6056                       && !operand_type_register_match (i.types[0],
6057                                                        operand_types[i.operands - 1],
6058                                                        i.types[i.operands - 1],
6059                                                        operand_types[0])))
6060                 {
6061                   /* Does not match either direction.  */
6062                   continue;
6063                 }
6064               /* found_reverse_match holds which of D or FloatR
6065                  we've found.  */
6066               if (!t->opcode_modifier.d)
6067                 found_reverse_match = 0;
6068               else if (operand_types[0].bitfield.tbyte)
6069                 found_reverse_match = Opcode_FloatD;
6070               else if (operand_types[0].bitfield.xmmword
6071                        || operand_types[i.operands - 1].bitfield.xmmword
6072                        || operand_types[0].bitfield.class == RegMMX
6073                        || operand_types[i.operands - 1].bitfield.class == RegMMX
6074                        || is_any_vex_encoding(t))
6075                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6076                                       ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6077               else
6078                 found_reverse_match = Opcode_D;
6079               if (t->opcode_modifier.floatr)
6080                 found_reverse_match |= Opcode_FloatR;
6081             }
6082           else
6083             {
6084               /* Found a forward 2 operand match here.  */
6085               switch (t->operands)
6086                 {
6087                 case 5:
6088                   overlap4 = operand_type_and (i.types[4],
6089                                                operand_types[4]);
6090                   /* Fall through.  */
6091                 case 4:
6092                   overlap3 = operand_type_and (i.types[3],
6093                                                operand_types[3]);
6094                   /* Fall through.  */
6095                 case 3:
6096                   overlap2 = operand_type_and (i.types[2],
6097                                                operand_types[2]);
6098                   break;
6099                 }
6100
6101               switch (t->operands)
6102                 {
6103                 case 5:
6104                   if (!operand_type_match (overlap4, i.types[4])
6105                       || !operand_type_register_match (i.types[3],
6106                                                        operand_types[3],
6107                                                        i.types[4],
6108                                                        operand_types[4]))
6109                     continue;
6110                   /* Fall through.  */
6111                 case 4:
6112                   if (!operand_type_match (overlap3, i.types[3])
6113                       || ((check_register & 0xa) == 0xa
6114                           && !operand_type_register_match (i.types[1],
6115                                                             operand_types[1],
6116                                                             i.types[3],
6117                                                             operand_types[3]))
6118                       || ((check_register & 0xc) == 0xc
6119                           && !operand_type_register_match (i.types[2],
6120                                                             operand_types[2],
6121                                                             i.types[3],
6122                                                             operand_types[3])))
6123                     continue;
6124                   /* Fall through.  */
6125                 case 3:
6126                   /* Here we make use of the fact that there are no
6127                      reverse match 3 operand instructions.  */
6128                   if (!operand_type_match (overlap2, i.types[2])
6129                       || ((check_register & 5) == 5
6130                           && !operand_type_register_match (i.types[0],
6131                                                             operand_types[0],
6132                                                             i.types[2],
6133                                                             operand_types[2]))
6134                       || ((check_register & 6) == 6
6135                           && !operand_type_register_match (i.types[1],
6136                                                             operand_types[1],
6137                                                             i.types[2],
6138                                                             operand_types[2])))
6139                     continue;
6140                   break;
6141                 }
6142             }
6143           /* Found either forward/reverse 2, 3 or 4 operand match here:
6144              slip through to break.  */
6145         }
6146
6147       /* Check if vector and VEX operands are valid.  */
6148       if (check_VecOperands (t) || VEX_check_operands (t))
6149         {
6150           specific_error = i.error;
6151           continue;
6152         }
6153
6154       /* We've found a match; break out of loop.  */
6155       break;
6156     }
6157
6158   if (t == current_templates->end)
6159     {
6160       /* We found no match.  */
6161       const char *err_msg;
6162       switch (specific_error ? specific_error : i.error)
6163         {
6164         default:
6165           abort ();
6166         case operand_size_mismatch:
6167           err_msg = _("operand size mismatch");
6168           break;
6169         case operand_type_mismatch:
6170           err_msg = _("operand type mismatch");
6171           break;
6172         case register_type_mismatch:
6173           err_msg = _("register type mismatch");
6174           break;
6175         case number_of_operands_mismatch:
6176           err_msg = _("number of operands mismatch");
6177           break;
6178         case invalid_instruction_suffix:
6179           err_msg = _("invalid instruction suffix");
6180           break;
6181         case bad_imm4:
6182           err_msg = _("constant doesn't fit in 4 bits");
6183           break;
6184         case unsupported_with_intel_mnemonic:
6185           err_msg = _("unsupported with Intel mnemonic");
6186           break;
6187         case unsupported_syntax:
6188           err_msg = _("unsupported syntax");
6189           break;
6190         case unsupported:
6191           as_bad (_("unsupported instruction `%s'"),
6192                   current_templates->start->name);
6193           return NULL;
6194         case invalid_vsib_address:
6195           err_msg = _("invalid VSIB address");
6196           break;
6197         case invalid_vector_register_set:
6198           err_msg = _("mask, index, and destination registers must be distinct");
6199           break;
6200         case unsupported_vector_index_register:
6201           err_msg = _("unsupported vector index register");
6202           break;
6203         case unsupported_broadcast:
6204           err_msg = _("unsupported broadcast");
6205           break;
6206         case broadcast_needed:
6207           err_msg = _("broadcast is needed for operand of such type");
6208           break;
6209         case unsupported_masking:
6210           err_msg = _("unsupported masking");
6211           break;
6212         case mask_not_on_destination:
6213           err_msg = _("mask not on destination operand");
6214           break;
6215         case no_default_mask:
6216           err_msg = _("default mask isn't allowed");
6217           break;
6218         case unsupported_rc_sae:
6219           err_msg = _("unsupported static rounding/sae");
6220           break;
6221         case rc_sae_operand_not_last_imm:
6222           if (intel_syntax)
6223             err_msg = _("RC/SAE operand must precede immediate operands");
6224           else
6225             err_msg = _("RC/SAE operand must follow immediate operands");
6226           break;
6227         case invalid_register_operand:
6228           err_msg = _("invalid register operand");
6229           break;
6230         }
6231       as_bad (_("%s for `%s'"), err_msg,
6232               current_templates->start->name);
6233       return NULL;
6234     }
6235
6236   if (!quiet_warnings)
6237     {
6238       if (!intel_syntax
6239           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6240         as_warn (_("indirect %s without `*'"), t->name);
6241
6242       if (t->opcode_modifier.isprefix
6243           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6244         {
6245           /* Warn them that a data or address size prefix doesn't
6246              affect assembly of the next line of code.  */
6247           as_warn (_("stand-alone `%s' prefix"), t->name);
6248         }
6249     }
6250
6251   /* Copy the template we found.  */
6252   i.tm = *t;
6253
6254   if (addr_prefix_disp != -1)
6255     i.tm.operand_types[addr_prefix_disp]
6256       = operand_types[addr_prefix_disp];
6257
6258   if (found_reverse_match)
6259     {
6260       /* If we found a reverse match we must alter the opcode direction
6261          bit and clear/flip the regmem modifier one.  found_reverse_match
6262          holds bits to change (different for int & float insns).  */
6263
6264       i.tm.base_opcode ^= found_reverse_match;
6265
6266       i.tm.operand_types[0] = operand_types[i.operands - 1];
6267       i.tm.operand_types[i.operands - 1] = operand_types[0];
6268
6269       /* Certain SIMD insns have their load forms specified in the opcode
6270          table, and hence we need to _set_ RegMem instead of clearing it.
6271          We need to avoid setting the bit though on insns like KMOVW.  */
6272       i.tm.opcode_modifier.regmem
6273         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6274           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6275           && !i.tm.opcode_modifier.regmem;
6276     }
6277
6278   return t;
6279 }
6280
6281 static int
6282 check_string (void)
6283 {
6284   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6285   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6286
6287   if (i.seg[op] != NULL && i.seg[op] != &es)
6288     {
6289       as_bad (_("`%s' operand %u must use `%ses' segment"),
6290               i.tm.name,
6291               intel_syntax ? i.tm.operands - es_op : es_op + 1,
6292               register_prefix);
6293       return 0;
6294     }
6295
6296   /* There's only ever one segment override allowed per instruction.
6297      This instruction possibly has a legal segment override on the
6298      second operand, so copy the segment to where non-string
6299      instructions store it, allowing common code.  */
6300   i.seg[op] = i.seg[1];
6301
6302   return 1;
6303 }
6304
6305 static int
6306 process_suffix (void)
6307 {
6308   /* If matched instruction specifies an explicit instruction mnemonic
6309      suffix, use it.  */
6310   if (i.tm.opcode_modifier.size == SIZE16)
6311     i.suffix = WORD_MNEM_SUFFIX;
6312   else if (i.tm.opcode_modifier.size == SIZE32)
6313     i.suffix = LONG_MNEM_SUFFIX;
6314   else if (i.tm.opcode_modifier.size == SIZE64)
6315     i.suffix = QWORD_MNEM_SUFFIX;
6316   else if (i.reg_operands
6317            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
6318            && !i.tm.opcode_modifier.addrprefixopreg)
6319     {
6320       unsigned int numop = i.operands;
6321
6322       /* movsx/movzx want only their source operand considered here, for the
6323          ambiguity checking below.  The suffix will be replaced afterwards
6324          to represent the destination (register).  */
6325       if (((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w)
6326           || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
6327         --i.operands;
6328
6329       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
6330       if (i.tm.base_opcode == 0xf20f38f0
6331           && i.tm.operand_types[1].bitfield.qword)
6332         i.rex |= REX_W;
6333
6334       /* If there's no instruction mnemonic suffix we try to invent one
6335          based on GPR operands.  */
6336       if (!i.suffix)
6337         {
6338           /* We take i.suffix from the last register operand specified,
6339              Destination register type is more significant than source
6340              register type.  crc32 in SSE4.2 prefers source register
6341              type. */
6342           unsigned int op = i.tm.base_opcode != 0xf20f38f0 ? i.operands : 1;
6343
6344           while (op--)
6345             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
6346                 || i.tm.operand_types[op].bitfield.instance == Accum)
6347               {
6348                 if (i.types[op].bitfield.class != Reg)
6349                   continue;
6350                 if (i.types[op].bitfield.byte)
6351                   i.suffix = BYTE_MNEM_SUFFIX;
6352                 else if (i.types[op].bitfield.word)
6353                   i.suffix = WORD_MNEM_SUFFIX;
6354                 else if (i.types[op].bitfield.dword)
6355                   i.suffix = LONG_MNEM_SUFFIX;
6356                 else if (i.types[op].bitfield.qword)
6357                   i.suffix = QWORD_MNEM_SUFFIX;
6358                 else
6359                   continue;
6360                 break;
6361               }
6362
6363           /* As an exception, movsx/movzx silently default to a byte source
6364              in AT&T mode.  */
6365           if ((i.tm.base_opcode | 8) == 0xfbe && i.tm.opcode_modifier.w
6366               && !i.suffix && !intel_syntax)
6367             i.suffix = BYTE_MNEM_SUFFIX;
6368         }
6369       else if (i.suffix == BYTE_MNEM_SUFFIX)
6370         {
6371           if (intel_syntax
6372               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6373               && i.tm.opcode_modifier.no_bsuf)
6374             i.suffix = 0;
6375           else if (!check_byte_reg ())
6376             return 0;
6377         }
6378       else if (i.suffix == LONG_MNEM_SUFFIX)
6379         {
6380           if (intel_syntax
6381               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6382               && i.tm.opcode_modifier.no_lsuf
6383               && !i.tm.opcode_modifier.todword
6384               && !i.tm.opcode_modifier.toqword)
6385             i.suffix = 0;
6386           else if (!check_long_reg ())
6387             return 0;
6388         }
6389       else if (i.suffix == QWORD_MNEM_SUFFIX)
6390         {
6391           if (intel_syntax
6392               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6393               && i.tm.opcode_modifier.no_qsuf
6394               && !i.tm.opcode_modifier.todword
6395               && !i.tm.opcode_modifier.toqword)
6396             i.suffix = 0;
6397           else if (!check_qword_reg ())
6398             return 0;
6399         }
6400       else if (i.suffix == WORD_MNEM_SUFFIX)
6401         {
6402           if (intel_syntax
6403               && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE
6404               && i.tm.opcode_modifier.no_wsuf)
6405             i.suffix = 0;
6406           else if (!check_word_reg ())
6407             return 0;
6408         }
6409       else if (intel_syntax
6410                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
6411         /* Do nothing if the instruction is going to ignore the prefix.  */
6412         ;
6413       else
6414         abort ();
6415
6416       /* Undo the movsx/movzx change done above.  */
6417       i.operands = numop;
6418     }
6419   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
6420            && !i.suffix)
6421     {
6422       i.suffix = stackop_size;
6423       if (stackop_size == LONG_MNEM_SUFFIX)
6424         {
6425           /* stackop_size is set to LONG_MNEM_SUFFIX for the
6426              .code16gcc directive to support 16-bit mode with
6427              32-bit address.  For IRET without a suffix, generate
6428              16-bit IRET (opcode 0xcf) to return from an interrupt
6429              handler.  */
6430           if (i.tm.base_opcode == 0xcf)
6431             {
6432               i.suffix = WORD_MNEM_SUFFIX;
6433               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
6434             }
6435           /* Warn about changed behavior for segment register push/pop.  */
6436           else if ((i.tm.base_opcode | 1) == 0x07)
6437             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
6438                      i.tm.name);
6439         }
6440     }
6441   else if (!i.suffix
6442            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
6443                || i.tm.opcode_modifier.jump == JUMP_BYTE
6444                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
6445                || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */
6446                    && i.tm.extension_opcode <= 3)))
6447     {
6448       switch (flag_code)
6449         {
6450         case CODE_64BIT:
6451           if (!i.tm.opcode_modifier.no_qsuf)
6452             {
6453               i.suffix = QWORD_MNEM_SUFFIX;
6454               break;
6455             }
6456           /* Fall through.  */
6457         case CODE_32BIT:
6458           if (!i.tm.opcode_modifier.no_lsuf)
6459             i.suffix = LONG_MNEM_SUFFIX;
6460           break;
6461         case CODE_16BIT:
6462           if (!i.tm.opcode_modifier.no_wsuf)
6463             i.suffix = WORD_MNEM_SUFFIX;
6464           break;
6465         }
6466     }
6467
6468   if (!i.suffix
6469       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
6470           /* Also cover lret/retf/iret in 64-bit mode.  */
6471           || (flag_code == CODE_64BIT
6472               && !i.tm.opcode_modifier.no_lsuf
6473               && !i.tm.opcode_modifier.no_qsuf))
6474       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
6475       /* Accept FLDENV et al without suffix.  */
6476       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
6477     {
6478       unsigned int suffixes, evex = 0;
6479
6480       suffixes = !i.tm.opcode_modifier.no_bsuf;
6481       if (!i.tm.opcode_modifier.no_wsuf)
6482         suffixes |= 1 << 1;
6483       if (!i.tm.opcode_modifier.no_lsuf)
6484         suffixes |= 1 << 2;
6485       if (!i.tm.opcode_modifier.no_ldsuf)
6486         suffixes |= 1 << 3;
6487       if (!i.tm.opcode_modifier.no_ssuf)
6488         suffixes |= 1 << 4;
6489       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
6490         suffixes |= 1 << 5;
6491
6492       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
6493          also suitable for AT&T syntax mode, it was requested that this be
6494          restricted to just Intel syntax.  */
6495       if (intel_syntax && is_any_vex_encoding (&i.tm) && !i.broadcast)
6496         {
6497           unsigned int op;
6498
6499           for (op = 0; op < i.tm.operands; ++op)
6500             {
6501               if (is_evex_encoding (&i.tm)
6502                   && !cpu_arch_flags.bitfield.cpuavx512vl)
6503                 {
6504                   if (i.tm.operand_types[op].bitfield.ymmword)
6505                     i.tm.operand_types[op].bitfield.xmmword = 0;
6506                   if (i.tm.operand_types[op].bitfield.zmmword)
6507                     i.tm.operand_types[op].bitfield.ymmword = 0;
6508                   if (!i.tm.opcode_modifier.evex
6509                       || i.tm.opcode_modifier.evex == EVEXDYN)
6510                     i.tm.opcode_modifier.evex = EVEX512;
6511                 }
6512
6513               if (i.tm.operand_types[op].bitfield.xmmword
6514                   + i.tm.operand_types[op].bitfield.ymmword
6515                   + i.tm.operand_types[op].bitfield.zmmword < 2)
6516                 continue;
6517
6518               /* Any properly sized operand disambiguates the insn.  */
6519               if (i.types[op].bitfield.xmmword
6520                   || i.types[op].bitfield.ymmword
6521                   || i.types[op].bitfield.zmmword)
6522                 {
6523                   suffixes &= ~(7 << 6);
6524                   evex = 0;
6525                   break;
6526                 }
6527
6528               if ((i.flags[op] & Operand_Mem)
6529                   && i.tm.operand_types[op].bitfield.unspecified)
6530                 {
6531                   if (i.tm.operand_types[op].bitfield.xmmword)
6532                     suffixes |= 1 << 6;
6533                   if (i.tm.operand_types[op].bitfield.ymmword)
6534                     suffixes |= 1 << 7;
6535                   if (i.tm.operand_types[op].bitfield.zmmword)
6536                     suffixes |= 1 << 8;
6537                   if (is_evex_encoding (&i.tm))
6538                     evex = EVEX512;
6539                 }
6540             }
6541         }
6542
6543       /* Are multiple suffixes / operand sizes allowed?  */
6544       if (suffixes & (suffixes - 1))
6545         {
6546           if (intel_syntax
6547               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
6548                   || operand_check == check_error))
6549             {
6550               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
6551               return 0;
6552             }
6553           if (operand_check == check_error)
6554             {
6555               as_bad (_("no instruction mnemonic suffix given and "
6556                         "no register operands; can't size `%s'"), i.tm.name);
6557               return 0;
6558             }
6559           if (operand_check == check_warning)
6560             as_warn (_("%s; using default for `%s'"),
6561                        intel_syntax
6562                        ? _("ambiguous operand size")
6563                        : _("no instruction mnemonic suffix given and "
6564                            "no register operands"),
6565                        i.tm.name);
6566
6567           if (i.tm.opcode_modifier.floatmf)
6568             i.suffix = SHORT_MNEM_SUFFIX;
6569           else if ((i.tm.base_opcode | 8) == 0xfbe
6570                    || (i.tm.base_opcode == 0x63
6571                        && i.tm.cpu_flags.bitfield.cpu64))
6572             /* handled below */;
6573           else if (evex)
6574             i.tm.opcode_modifier.evex = evex;
6575           else if (flag_code == CODE_16BIT)
6576             i.suffix = WORD_MNEM_SUFFIX;
6577           else if (!i.tm.opcode_modifier.no_lsuf)
6578             i.suffix = LONG_MNEM_SUFFIX;
6579           else
6580             i.suffix = QWORD_MNEM_SUFFIX;
6581         }
6582     }
6583
6584   if ((i.tm.base_opcode | 8) == 0xfbe
6585       || (i.tm.base_opcode == 0x63 && i.tm.cpu_flags.bitfield.cpu64))
6586     {
6587       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
6588          In AT&T syntax, if there is no suffix (warned about above), the default
6589          will be byte extension.  */
6590       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
6591         i.tm.base_opcode |= 1;
6592
6593       /* For further processing, the suffix should represent the destination
6594          (register).  This is already the case when one was used with
6595          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
6596          no suffix to begin with.  */
6597       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
6598         {
6599           if (i.types[1].bitfield.word)
6600             i.suffix = WORD_MNEM_SUFFIX;
6601           else if (i.types[1].bitfield.qword)
6602             i.suffix = QWORD_MNEM_SUFFIX;
6603           else
6604             i.suffix = LONG_MNEM_SUFFIX;
6605
6606           i.tm.opcode_modifier.w = 0;
6607         }
6608     }
6609
6610   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
6611     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
6612                    != (i.tm.operand_types[1].bitfield.class == Reg);
6613
6614   /* Change the opcode based on the operand size given by i.suffix.  */
6615   switch (i.suffix)
6616     {
6617     /* Size floating point instruction.  */
6618     case LONG_MNEM_SUFFIX:
6619       if (i.tm.opcode_modifier.floatmf)
6620         {
6621           i.tm.base_opcode ^= 4;
6622           break;
6623         }
6624     /* fall through */
6625     case WORD_MNEM_SUFFIX:
6626     case QWORD_MNEM_SUFFIX:
6627       /* It's not a byte, select word/dword operation.  */
6628       if (i.tm.opcode_modifier.w)
6629         {
6630           if (i.short_form)
6631             i.tm.base_opcode |= 8;
6632           else
6633             i.tm.base_opcode |= 1;
6634         }
6635     /* fall through */
6636     case SHORT_MNEM_SUFFIX:
6637       /* Now select between word & dword operations via the operand
6638          size prefix, except for instructions that will ignore this
6639          prefix anyway.  */
6640       if (i.suffix != QWORD_MNEM_SUFFIX
6641           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
6642           && !i.tm.opcode_modifier.floatmf
6643           && !is_any_vex_encoding (&i.tm)
6644           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
6645               || (flag_code == CODE_64BIT
6646                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
6647         {
6648           unsigned int prefix = DATA_PREFIX_OPCODE;
6649
6650           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
6651             prefix = ADDR_PREFIX_OPCODE;
6652
6653           if (!add_prefix (prefix))
6654             return 0;
6655         }
6656
6657       /* Set mode64 for an operand.  */
6658       if (i.suffix == QWORD_MNEM_SUFFIX
6659           && flag_code == CODE_64BIT
6660           && !i.tm.opcode_modifier.norex64
6661           && !i.tm.opcode_modifier.vexw
6662           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
6663              need rex64. */
6664           && ! (i.operands == 2
6665                 && i.tm.base_opcode == 0x90
6666                 && i.tm.extension_opcode == None
6667                 && i.types[0].bitfield.instance == Accum
6668                 && i.types[0].bitfield.qword
6669                 && i.types[1].bitfield.instance == Accum
6670                 && i.types[1].bitfield.qword))
6671         i.rex |= REX_W;
6672
6673       break;
6674     }
6675
6676   if (i.tm.opcode_modifier.addrprefixopreg)
6677     {
6678       gas_assert (!i.suffix);
6679       gas_assert (i.reg_operands);
6680
6681       if (i.tm.operand_types[0].bitfield.instance == Accum
6682           || i.operands == 1)
6683         {
6684           /* The address size override prefix changes the size of the
6685              first operand.  */
6686           if (flag_code == CODE_64BIT
6687               && i.op[0].regs->reg_type.bitfield.word)
6688             {
6689               as_bad (_("16-bit addressing unavailable for `%s'"),
6690                       i.tm.name);
6691               return 0;
6692             }
6693
6694           if ((flag_code == CODE_32BIT
6695                ? i.op[0].regs->reg_type.bitfield.word
6696                : i.op[0].regs->reg_type.bitfield.dword)
6697               && !add_prefix (ADDR_PREFIX_OPCODE))
6698             return 0;
6699         }
6700       else
6701         {
6702           /* Check invalid register operand when the address size override
6703              prefix changes the size of register operands.  */
6704           unsigned int op;
6705           enum { need_word, need_dword, need_qword } need;
6706
6707           if (flag_code == CODE_32BIT)
6708             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
6709           else if (i.prefix[ADDR_PREFIX])
6710             need = need_dword;
6711           else
6712             need = flag_code == CODE_64BIT ? need_qword : need_word;
6713
6714           for (op = 0; op < i.operands; op++)
6715             {
6716               if (i.types[op].bitfield.class != Reg)
6717                 continue;
6718
6719               switch (need)
6720                 {
6721                 case need_word:
6722                   if (i.op[op].regs->reg_type.bitfield.word)
6723                     continue;
6724                   break;
6725                 case need_dword:
6726                   if (i.op[op].regs->reg_type.bitfield.dword)
6727                     continue;
6728                   break;
6729                 case need_qword:
6730                   if (i.op[op].regs->reg_type.bitfield.qword)
6731                     continue;
6732                   break;
6733                 }
6734
6735               as_bad (_("invalid register operand size for `%s'"),
6736                       i.tm.name);
6737               return 0;
6738             }
6739         }
6740     }
6741
6742   return 1;
6743 }
6744
6745 static int
6746 check_byte_reg (void)
6747 {
6748   int op;
6749
6750   for (op = i.operands; --op >= 0;)
6751     {
6752       /* Skip non-register operands. */
6753       if (i.types[op].bitfield.class != Reg)
6754         continue;
6755
6756       /* If this is an eight bit register, it's OK.  If it's the 16 or
6757          32 bit version of an eight bit register, we will just use the
6758          low portion, and that's OK too.  */
6759       if (i.types[op].bitfield.byte)
6760         continue;
6761
6762       /* I/O port address operands are OK too.  */
6763       if (i.tm.operand_types[op].bitfield.instance == RegD
6764           && i.tm.operand_types[op].bitfield.word)
6765         continue;
6766
6767       /* crc32 only wants its source operand checked here.  */
6768       if (i.tm.base_opcode == 0xf20f38f0 && op)
6769         continue;
6770
6771       /* Any other register is bad.  */
6772       if (i.types[op].bitfield.class == Reg
6773           || i.types[op].bitfield.class == RegMMX
6774           || i.types[op].bitfield.class == RegSIMD
6775           || i.types[op].bitfield.class == SReg
6776           || i.types[op].bitfield.class == RegCR
6777           || i.types[op].bitfield.class == RegDR
6778           || i.types[op].bitfield.class == RegTR)
6779         {
6780           as_bad (_("`%s%s' not allowed with `%s%c'"),
6781                   register_prefix,
6782                   i.op[op].regs->reg_name,
6783                   i.tm.name,
6784                   i.suffix);
6785           return 0;
6786         }
6787     }
6788   return 1;
6789 }
6790
6791 static int
6792 check_long_reg (void)
6793 {
6794   int op;
6795
6796   for (op = i.operands; --op >= 0;)
6797     /* Skip non-register operands. */
6798     if (i.types[op].bitfield.class != Reg)
6799       continue;
6800     /* Reject eight bit registers, except where the template requires
6801        them. (eg. movzb)  */
6802     else if (i.types[op].bitfield.byte
6803              && (i.tm.operand_types[op].bitfield.class == Reg
6804                  || i.tm.operand_types[op].bitfield.instance == Accum)
6805              && (i.tm.operand_types[op].bitfield.word
6806                  || i.tm.operand_types[op].bitfield.dword))
6807       {
6808         as_bad (_("`%s%s' not allowed with `%s%c'"),
6809                 register_prefix,
6810                 i.op[op].regs->reg_name,
6811                 i.tm.name,
6812                 i.suffix);
6813         return 0;
6814       }
6815     /* Error if the e prefix on a general reg is missing.  */
6816     else if (i.types[op].bitfield.word
6817              && (i.tm.operand_types[op].bitfield.class == Reg
6818                  || i.tm.operand_types[op].bitfield.instance == Accum)
6819              && i.tm.operand_types[op].bitfield.dword)
6820       {
6821         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6822                 register_prefix, i.op[op].regs->reg_name,
6823                 i.suffix);
6824         return 0;
6825       }
6826     /* Warn if the r prefix on a general reg is present.  */
6827     else if (i.types[op].bitfield.qword
6828              && (i.tm.operand_types[op].bitfield.class == Reg
6829                  || i.tm.operand_types[op].bitfield.instance == Accum)
6830              && i.tm.operand_types[op].bitfield.dword)
6831       {
6832         if (intel_syntax
6833             && i.tm.opcode_modifier.toqword
6834             && i.types[0].bitfield.class != RegSIMD)
6835           {
6836             /* Convert to QWORD.  We want REX byte. */
6837             i.suffix = QWORD_MNEM_SUFFIX;
6838           }
6839         else
6840           {
6841             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6842                     register_prefix, i.op[op].regs->reg_name,
6843                     i.suffix);
6844             return 0;
6845           }
6846       }
6847   return 1;
6848 }
6849
6850 static int
6851 check_qword_reg (void)
6852 {
6853   int op;
6854
6855   for (op = i.operands; --op >= 0; )
6856     /* Skip non-register operands. */
6857     if (i.types[op].bitfield.class != Reg)
6858       continue;
6859     /* Reject eight bit registers, except where the template requires
6860        them. (eg. movzb)  */
6861     else if (i.types[op].bitfield.byte
6862              && (i.tm.operand_types[op].bitfield.class == Reg
6863                  || i.tm.operand_types[op].bitfield.instance == Accum)
6864              && (i.tm.operand_types[op].bitfield.word
6865                  || i.tm.operand_types[op].bitfield.dword))
6866       {
6867         as_bad (_("`%s%s' not allowed with `%s%c'"),
6868                 register_prefix,
6869                 i.op[op].regs->reg_name,
6870                 i.tm.name,
6871                 i.suffix);
6872         return 0;
6873       }
6874     /* Warn if the r prefix on a general reg is missing.  */
6875     else if ((i.types[op].bitfield.word
6876               || i.types[op].bitfield.dword)
6877              && (i.tm.operand_types[op].bitfield.class == Reg
6878                  || i.tm.operand_types[op].bitfield.instance == Accum)
6879              && i.tm.operand_types[op].bitfield.qword)
6880       {
6881         /* Prohibit these changes in the 64bit mode, since the
6882            lowering is more complicated.  */
6883         if (intel_syntax
6884             && i.tm.opcode_modifier.todword
6885             && i.types[0].bitfield.class != RegSIMD)
6886           {
6887             /* Convert to DWORD.  We don't want REX byte. */
6888             i.suffix = LONG_MNEM_SUFFIX;
6889           }
6890         else
6891           {
6892             as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6893                     register_prefix, i.op[op].regs->reg_name,
6894                     i.suffix);
6895             return 0;
6896           }
6897       }
6898   return 1;
6899 }
6900
6901 static int
6902 check_word_reg (void)
6903 {
6904   int op;
6905   for (op = i.operands; --op >= 0;)
6906     /* Skip non-register operands. */
6907     if (i.types[op].bitfield.class != Reg)
6908       continue;
6909     /* Reject eight bit registers, except where the template requires
6910        them. (eg. movzb)  */
6911     else if (i.types[op].bitfield.byte
6912              && (i.tm.operand_types[op].bitfield.class == Reg
6913                  || i.tm.operand_types[op].bitfield.instance == Accum)
6914              && (i.tm.operand_types[op].bitfield.word
6915                  || i.tm.operand_types[op].bitfield.dword))
6916       {
6917         as_bad (_("`%s%s' not allowed with `%s%c'"),
6918                 register_prefix,
6919                 i.op[op].regs->reg_name,
6920                 i.tm.name,
6921                 i.suffix);
6922         return 0;
6923       }
6924     /* Error if the e or r prefix on a general reg is present.  */
6925     else if ((i.types[op].bitfield.dword
6926                  || i.types[op].bitfield.qword)
6927              && (i.tm.operand_types[op].bitfield.class == Reg
6928                  || i.tm.operand_types[op].bitfield.instance == Accum)
6929              && i.tm.operand_types[op].bitfield.word)
6930       {
6931         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6932                 register_prefix, i.op[op].regs->reg_name,
6933                 i.suffix);
6934         return 0;
6935       }
6936   return 1;
6937 }
6938
6939 static int
6940 update_imm (unsigned int j)
6941 {
6942   i386_operand_type overlap = i.types[j];
6943   if ((overlap.bitfield.imm8
6944        || overlap.bitfield.imm8s
6945        || overlap.bitfield.imm16
6946        || overlap.bitfield.imm32
6947        || overlap.bitfield.imm32s
6948        || overlap.bitfield.imm64)
6949       && !operand_type_equal (&overlap, &imm8)
6950       && !operand_type_equal (&overlap, &imm8s)
6951       && !operand_type_equal (&overlap, &imm16)
6952       && !operand_type_equal (&overlap, &imm32)
6953       && !operand_type_equal (&overlap, &imm32s)
6954       && !operand_type_equal (&overlap, &imm64))
6955     {
6956       if (i.suffix)
6957         {
6958           i386_operand_type temp;
6959
6960           operand_type_set (&temp, 0);
6961           if (i.suffix == BYTE_MNEM_SUFFIX)
6962             {
6963               temp.bitfield.imm8 = overlap.bitfield.imm8;
6964               temp.bitfield.imm8s = overlap.bitfield.imm8s;
6965             }
6966           else if (i.suffix == WORD_MNEM_SUFFIX)
6967             temp.bitfield.imm16 = overlap.bitfield.imm16;
6968           else if (i.suffix == QWORD_MNEM_SUFFIX)
6969             {
6970               temp.bitfield.imm64 = overlap.bitfield.imm64;
6971               temp.bitfield.imm32s = overlap.bitfield.imm32s;
6972             }
6973           else
6974             temp.bitfield.imm32 = overlap.bitfield.imm32;
6975           overlap = temp;
6976         }
6977       else if (operand_type_equal (&overlap, &imm16_32_32s)
6978                || operand_type_equal (&overlap, &imm16_32)
6979                || operand_type_equal (&overlap, &imm16_32s))
6980         {
6981           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6982             overlap = imm16;
6983           else
6984             overlap = imm32s;
6985         }
6986       if (!operand_type_equal (&overlap, &imm8)
6987           && !operand_type_equal (&overlap, &imm8s)
6988           && !operand_type_equal (&overlap, &imm16)
6989           && !operand_type_equal (&overlap, &imm32)
6990           && !operand_type_equal (&overlap, &imm32s)
6991           && !operand_type_equal (&overlap, &imm64))
6992         {
6993           as_bad (_("no instruction mnemonic suffix given; "
6994                     "can't determine immediate size"));
6995           return 0;
6996         }
6997     }
6998   i.types[j] = overlap;
6999
7000   return 1;
7001 }
7002
7003 static int
7004 finalize_imm (void)
7005 {
7006   unsigned int j, n;
7007
7008   /* Update the first 2 immediate operands.  */
7009   n = i.operands > 2 ? 2 : i.operands;
7010   if (n)
7011     {
7012       for (j = 0; j < n; j++)
7013         if (update_imm (j) == 0)
7014           return 0;
7015
7016       /* The 3rd operand can't be immediate operand.  */
7017       gas_assert (operand_type_check (i.types[2], imm) == 0);
7018     }
7019
7020   return 1;
7021 }
7022
7023 static int
7024 process_operands (void)
7025 {
7026   /* Default segment register this instruction will use for memory
7027      accesses.  0 means unknown.  This is only for optimizing out
7028      unnecessary segment overrides.  */
7029   const seg_entry *default_seg = 0;
7030
7031   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7032     {
7033       unsigned int dupl = i.operands;
7034       unsigned int dest = dupl - 1;
7035       unsigned int j;
7036
7037       /* The destination must be an xmm register.  */
7038       gas_assert (i.reg_operands
7039                   && MAX_OPERANDS > dupl
7040                   && operand_type_equal (&i.types[dest], &regxmm));
7041
7042       if (i.tm.operand_types[0].bitfield.instance == Accum
7043           && i.tm.operand_types[0].bitfield.xmmword)
7044         {
7045           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7046             {
7047               /* Keep xmm0 for instructions with VEX prefix and 3
7048                  sources.  */
7049               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7050               i.tm.operand_types[0].bitfield.class = RegSIMD;
7051               goto duplicate;
7052             }
7053           else
7054             {
7055               /* We remove the first xmm0 and keep the number of
7056                  operands unchanged, which in fact duplicates the
7057                  destination.  */
7058               for (j = 1; j < i.operands; j++)
7059                 {
7060                   i.op[j - 1] = i.op[j];
7061                   i.types[j - 1] = i.types[j];
7062                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7063                   i.flags[j - 1] = i.flags[j];
7064                 }
7065             }
7066         }
7067       else if (i.tm.opcode_modifier.implicit1stxmm0)
7068         {
7069           gas_assert ((MAX_OPERANDS - 1) > dupl
7070                       && (i.tm.opcode_modifier.vexsources
7071                           == VEX3SOURCES));
7072
7073           /* Add the implicit xmm0 for instructions with VEX prefix
7074              and 3 sources.  */
7075           for (j = i.operands; j > 0; j--)
7076             {
7077               i.op[j] = i.op[j - 1];
7078               i.types[j] = i.types[j - 1];
7079               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7080               i.flags[j] = i.flags[j - 1];
7081             }
7082           i.op[0].regs
7083             = (const reg_entry *) hash_find (reg_hash, "xmm0");
7084           i.types[0] = regxmm;
7085           i.tm.operand_types[0] = regxmm;
7086
7087           i.operands += 2;
7088           i.reg_operands += 2;
7089           i.tm.operands += 2;
7090
7091           dupl++;
7092           dest++;
7093           i.op[dupl] = i.op[dest];
7094           i.types[dupl] = i.types[dest];
7095           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7096           i.flags[dupl] = i.flags[dest];
7097         }
7098       else
7099         {
7100         duplicate:
7101           i.operands++;
7102           i.reg_operands++;
7103           i.tm.operands++;
7104
7105           i.op[dupl] = i.op[dest];
7106           i.types[dupl] = i.types[dest];
7107           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7108           i.flags[dupl] = i.flags[dest];
7109         }
7110
7111        if (i.tm.opcode_modifier.immext)
7112          process_immext ();
7113     }
7114   else if (i.tm.operand_types[0].bitfield.instance == Accum
7115            && i.tm.operand_types[0].bitfield.xmmword)
7116     {
7117       unsigned int j;
7118
7119       for (j = 1; j < i.operands; j++)
7120         {
7121           i.op[j - 1] = i.op[j];
7122           i.types[j - 1] = i.types[j];
7123
7124           /* We need to adjust fields in i.tm since they are used by
7125              build_modrm_byte.  */
7126           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7127
7128           i.flags[j - 1] = i.flags[j];
7129         }
7130
7131       i.operands--;
7132       i.reg_operands--;
7133       i.tm.operands--;
7134     }
7135   else if (i.tm.opcode_modifier.implicitquadgroup)
7136     {
7137       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7138
7139       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7140       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7141       regnum = register_number (i.op[1].regs);
7142       first_reg_in_group = regnum & ~3;
7143       last_reg_in_group = first_reg_in_group + 3;
7144       if (regnum != first_reg_in_group)
7145         as_warn (_("source register `%s%s' implicitly denotes"
7146                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7147                  register_prefix, i.op[1].regs->reg_name,
7148                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7149                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7150                  i.tm.name);
7151     }
7152   else if (i.tm.opcode_modifier.regkludge)
7153     {
7154       /* The imul $imm, %reg instruction is converted into
7155          imul $imm, %reg, %reg, and the clr %reg instruction
7156          is converted into xor %reg, %reg.  */
7157
7158       unsigned int first_reg_op;
7159
7160       if (operand_type_check (i.types[0], reg))
7161         first_reg_op = 0;
7162       else
7163         first_reg_op = 1;
7164       /* Pretend we saw the extra register operand.  */
7165       gas_assert (i.reg_operands == 1
7166                   && i.op[first_reg_op + 1].regs == 0);
7167       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7168       i.types[first_reg_op + 1] = i.types[first_reg_op];
7169       i.operands++;
7170       i.reg_operands++;
7171     }
7172
7173   if (i.tm.opcode_modifier.modrm)
7174     {
7175       /* The opcode is completed (modulo i.tm.extension_opcode which
7176          must be put into the modrm byte).  Now, we make the modrm and
7177          index base bytes based on all the info we've collected.  */
7178
7179       default_seg = build_modrm_byte ();
7180     }
7181   else if (i.types[0].bitfield.class == SReg)
7182     {
7183       if (flag_code != CODE_64BIT
7184           ? i.tm.base_opcode == POP_SEG_SHORT
7185             && i.op[0].regs->reg_num == 1
7186           : (i.tm.base_opcode | 1) == POP_SEG386_SHORT
7187             && i.op[0].regs->reg_num < 4)
7188         {
7189           as_bad (_("you can't `%s %s%s'"),
7190                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7191           return 0;
7192         }
7193       if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 )
7194         {
7195           i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT;
7196           i.tm.opcode_length = 2;
7197         }
7198       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7199     }
7200   else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32)
7201     {
7202       default_seg = &ds;
7203     }
7204   else if (i.tm.opcode_modifier.isstring)
7205     {
7206       /* For the string instructions that allow a segment override
7207          on one of their operands, the default segment is ds.  */
7208       default_seg = &ds;
7209     }
7210   else if (i.short_form)
7211     {
7212       /* The register or float register operand is in operand
7213          0 or 1.  */
7214       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7215
7216       /* Register goes in low 3 bits of opcode.  */
7217       i.tm.base_opcode |= i.op[op].regs->reg_num;
7218       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7219         i.rex |= REX_B;
7220       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7221         {
7222           /* Warn about some common errors, but press on regardless.
7223              The first case can be generated by gcc (<= 2.8.1).  */
7224           if (i.operands == 2)
7225             {
7226               /* Reversed arguments on faddp, fsubp, etc.  */
7227               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7228                        register_prefix, i.op[!intel_syntax].regs->reg_name,
7229                        register_prefix, i.op[intel_syntax].regs->reg_name);
7230             }
7231           else
7232             {
7233               /* Extraneous `l' suffix on fp insn.  */
7234               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7235                        register_prefix, i.op[0].regs->reg_name);
7236             }
7237         }
7238     }
7239
7240   if ((i.seg[0] || i.prefix[SEG_PREFIX])
7241       && i.tm.base_opcode == 0x8d /* lea */
7242       && !is_any_vex_encoding(&i.tm))
7243     {
7244       if (!quiet_warnings)
7245         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7246       if (optimize)
7247         {
7248           i.seg[0] = NULL;
7249           i.prefix[SEG_PREFIX] = 0;
7250         }
7251     }
7252
7253   /* If a segment was explicitly specified, and the specified segment
7254      is neither the default nor the one already recorded from a prefix,
7255      use an opcode prefix to select it.  If we never figured out what
7256      the default segment is, then default_seg will be zero at this
7257      point, and the specified segment prefix will always be used.  */
7258   if (i.seg[0]
7259       && i.seg[0] != default_seg
7260       && i.seg[0]->seg_prefix != i.prefix[SEG_PREFIX])
7261     {
7262       if (!add_prefix (i.seg[0]->seg_prefix))
7263         return 0;
7264     }
7265   return 1;
7266 }
7267
7268 static const seg_entry *
7269 build_modrm_byte (void)
7270 {
7271   const seg_entry *default_seg = 0;
7272   unsigned int source, dest;
7273   int vex_3_sources;
7274
7275   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
7276   if (vex_3_sources)
7277     {
7278       unsigned int nds, reg_slot;
7279       expressionS *exp;
7280
7281       dest = i.operands - 1;
7282       nds = dest - 1;
7283
7284       /* There are 2 kinds of instructions:
7285          1. 5 operands: 4 register operands or 3 register operands
7286          plus 1 memory operand plus one Imm4 operand, VexXDS, and
7287          VexW0 or VexW1.  The destination must be either XMM, YMM or
7288          ZMM register.
7289          2. 4 operands: 4 register operands or 3 register operands
7290          plus 1 memory operand, with VexXDS.  */
7291       gas_assert ((i.reg_operands == 4
7292                    || (i.reg_operands == 3 && i.mem_operands == 1))
7293                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
7294                   && i.tm.opcode_modifier.vexw
7295                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
7296
7297       /* If VexW1 is set, the first non-immediate operand is the source and
7298          the second non-immediate one is encoded in the immediate operand.  */
7299       if (i.tm.opcode_modifier.vexw == VEXW1)
7300         {
7301           source = i.imm_operands;
7302           reg_slot = i.imm_operands + 1;
7303         }
7304       else
7305         {
7306           source = i.imm_operands + 1;
7307           reg_slot = i.imm_operands;
7308         }
7309
7310       if (i.imm_operands == 0)
7311         {
7312           /* When there is no immediate operand, generate an 8bit
7313              immediate operand to encode the first operand.  */
7314           exp = &im_expressions[i.imm_operands++];
7315           i.op[i.operands].imms = exp;
7316           i.types[i.operands] = imm8;
7317           i.operands++;
7318
7319           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7320           exp->X_op = O_constant;
7321           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
7322           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7323         }
7324       else
7325         {
7326           gas_assert (i.imm_operands == 1);
7327           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
7328           gas_assert (!i.tm.opcode_modifier.immext);
7329
7330           /* Turn on Imm8 again so that output_imm will generate it.  */
7331           i.types[0].bitfield.imm8 = 1;
7332
7333           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7334           i.op[0].imms->X_add_number
7335               |= register_number (i.op[reg_slot].regs) << 4;
7336           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7337         }
7338
7339       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
7340       i.vex.register_specifier = i.op[nds].regs;
7341     }
7342   else
7343     source = dest = 0;
7344
7345   /* i.reg_operands MUST be the number of real register operands;
7346      implicit registers do not count.  If there are 3 register
7347      operands, it must be a instruction with VexNDS.  For a
7348      instruction with VexNDD, the destination register is encoded
7349      in VEX prefix.  If there are 4 register operands, it must be
7350      a instruction with VEX prefix and 3 sources.  */
7351   if (i.mem_operands == 0
7352       && ((i.reg_operands == 2
7353            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
7354           || (i.reg_operands == 3
7355               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
7356           || (i.reg_operands == 4 && vex_3_sources)))
7357     {
7358       switch (i.operands)
7359         {
7360         case 2:
7361           source = 0;
7362           break;
7363         case 3:
7364           /* When there are 3 operands, one of them may be immediate,
7365              which may be the first or the last operand.  Otherwise,
7366              the first operand must be shift count register (cl) or it
7367              is an instruction with VexNDS. */
7368           gas_assert (i.imm_operands == 1
7369                       || (i.imm_operands == 0
7370                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
7371                               || (i.types[0].bitfield.instance == RegC
7372                                   && i.types[0].bitfield.byte))));
7373           if (operand_type_check (i.types[0], imm)
7374               || (i.types[0].bitfield.instance == RegC
7375                   && i.types[0].bitfield.byte))
7376             source = 1;
7377           else
7378             source = 0;
7379           break;
7380         case 4:
7381           /* When there are 4 operands, the first two must be 8bit
7382              immediate operands. The source operand will be the 3rd
7383              one.
7384
7385              For instructions with VexNDS, if the first operand
7386              an imm8, the source operand is the 2nd one.  If the last
7387              operand is imm8, the source operand is the first one.  */
7388           gas_assert ((i.imm_operands == 2
7389                        && i.types[0].bitfield.imm8
7390                        && i.types[1].bitfield.imm8)
7391                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
7392                           && i.imm_operands == 1
7393                           && (i.types[0].bitfield.imm8
7394                               || i.types[i.operands - 1].bitfield.imm8
7395                               || i.rounding)));
7396           if (i.imm_operands == 2)
7397             source = 2;
7398           else
7399             {
7400               if (i.types[0].bitfield.imm8)
7401                 source = 1;
7402               else
7403                 source = 0;
7404             }
7405           break;
7406         case 5:
7407           if (is_evex_encoding (&i.tm))
7408             {
7409               /* For EVEX instructions, when there are 5 operands, the
7410                  first one must be immediate operand.  If the second one
7411                  is immediate operand, the source operand is the 3th
7412                  one.  If the last one is immediate operand, the source
7413                  operand is the 2nd one.  */
7414               gas_assert (i.imm_operands == 2
7415                           && i.tm.opcode_modifier.sae
7416                           && operand_type_check (i.types[0], imm));
7417               if (operand_type_check (i.types[1], imm))
7418                 source = 2;
7419               else if (operand_type_check (i.types[4], imm))
7420                 source = 1;
7421               else
7422                 abort ();
7423             }
7424           break;
7425         default:
7426           abort ();
7427         }
7428
7429       if (!vex_3_sources)
7430         {
7431           dest = source + 1;
7432
7433           /* RC/SAE operand could be between DEST and SRC.  That happens
7434              when one operand is GPR and the other one is XMM/YMM/ZMM
7435              register.  */
7436           if (i.rounding && i.rounding->operand == (int) dest)
7437             dest++;
7438
7439           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7440             {
7441               /* For instructions with VexNDS, the register-only source
7442                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
7443                  register.  It is encoded in VEX prefix.  */
7444
7445               i386_operand_type op;
7446               unsigned int vvvv;
7447
7448               /* Check register-only source operand when two source
7449                  operands are swapped.  */
7450               if (!i.tm.operand_types[source].bitfield.baseindex
7451                   && i.tm.operand_types[dest].bitfield.baseindex)
7452                 {
7453                   vvvv = source;
7454                   source = dest;
7455                 }
7456               else
7457                 vvvv = dest;
7458
7459               op = i.tm.operand_types[vvvv];
7460               if ((dest + 1) >= i.operands
7461                   || ((op.bitfield.class != Reg
7462                        || (!op.bitfield.dword && !op.bitfield.qword))
7463                       && op.bitfield.class != RegSIMD
7464                       && !operand_type_equal (&op, &regmask)))
7465                 abort ();
7466               i.vex.register_specifier = i.op[vvvv].regs;
7467               dest++;
7468             }
7469         }
7470
7471       i.rm.mode = 3;
7472       /* One of the register operands will be encoded in the i.rm.reg
7473          field, the other in the combined i.rm.mode and i.rm.regmem
7474          fields.  If no form of this instruction supports a memory
7475          destination operand, then we assume the source operand may
7476          sometimes be a memory operand and so we need to store the
7477          destination in the i.rm.reg field.  */
7478       if (!i.tm.opcode_modifier.regmem
7479           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
7480         {
7481           i.rm.reg = i.op[dest].regs->reg_num;
7482           i.rm.regmem = i.op[source].regs->reg_num;
7483           if (i.op[dest].regs->reg_type.bitfield.class == RegMMX
7484                || i.op[source].regs->reg_type.bitfield.class == RegMMX)
7485             i.has_regmmx = TRUE;
7486           else if (i.op[dest].regs->reg_type.bitfield.class == RegSIMD
7487                    || i.op[source].regs->reg_type.bitfield.class == RegSIMD)
7488             {
7489               if (i.types[dest].bitfield.zmmword
7490                   || i.types[source].bitfield.zmmword)
7491                 i.has_regzmm = TRUE;
7492               else if (i.types[dest].bitfield.ymmword
7493                        || i.types[source].bitfield.ymmword)
7494                 i.has_regymm = TRUE;
7495               else
7496                 i.has_regxmm = TRUE;
7497             }
7498           if ((i.op[dest].regs->reg_flags & RegRex) != 0)
7499             i.rex |= REX_R;
7500           if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
7501             i.vrex |= REX_R;
7502           if ((i.op[source].regs->reg_flags & RegRex) != 0)
7503             i.rex |= REX_B;
7504           if ((i.op[source].regs->reg_flags & RegVRex) != 0)
7505             i.vrex |= REX_B;
7506         }
7507       else
7508         {
7509           i.rm.reg = i.op[source].regs->reg_num;
7510           i.rm.regmem = i.op[dest].regs->reg_num;
7511           if ((i.op[dest].regs->reg_flags & RegRex) != 0)
7512             i.rex |= REX_B;
7513           if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
7514             i.vrex |= REX_B;
7515           if ((i.op[source].regs->reg_flags & RegRex) != 0)
7516             i.rex |= REX_R;
7517           if ((i.op[source].regs->reg_flags & RegVRex) != 0)
7518             i.vrex |= REX_R;
7519         }
7520       if (flag_code != CODE_64BIT && (i.rex & REX_R))
7521         {
7522           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
7523             abort ();
7524           i.rex &= ~REX_R;
7525           add_prefix (LOCK_PREFIX_OPCODE);
7526         }
7527     }
7528   else
7529     {                   /* If it's not 2 reg operands...  */
7530       unsigned int mem;
7531
7532       if (i.mem_operands)
7533         {
7534           unsigned int fake_zero_displacement = 0;
7535           unsigned int op;
7536
7537           for (op = 0; op < i.operands; op++)
7538             if (i.flags[op] & Operand_Mem)
7539               break;
7540           gas_assert (op < i.operands);
7541
7542           if (i.tm.opcode_modifier.vecsib)
7543             {
7544               if (i.index_reg->reg_num == RegIZ)
7545                 abort ();
7546
7547               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7548               if (!i.base_reg)
7549                 {
7550                   i.sib.base = NO_BASE_REGISTER;
7551                   i.sib.scale = i.log2_scale_factor;
7552                   i.types[op].bitfield.disp8 = 0;
7553                   i.types[op].bitfield.disp16 = 0;
7554                   i.types[op].bitfield.disp64 = 0;
7555                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
7556                     {
7557                       /* Must be 32 bit */
7558                       i.types[op].bitfield.disp32 = 1;
7559                       i.types[op].bitfield.disp32s = 0;
7560                     }
7561                   else
7562                     {
7563                       i.types[op].bitfield.disp32 = 0;
7564                       i.types[op].bitfield.disp32s = 1;
7565                     }
7566                 }
7567               i.sib.index = i.index_reg->reg_num;
7568               if ((i.index_reg->reg_flags & RegRex) != 0)
7569                 i.rex |= REX_X;
7570               if ((i.index_reg->reg_flags & RegVRex) != 0)
7571                 i.vrex |= REX_X;
7572             }
7573
7574           default_seg = &ds;
7575
7576           if (i.base_reg == 0)
7577             {
7578               i.rm.mode = 0;
7579               if (!i.disp_operands)
7580                 fake_zero_displacement = 1;
7581               if (i.index_reg == 0)
7582                 {
7583                   i386_operand_type newdisp;
7584
7585                   gas_assert (!i.tm.opcode_modifier.vecsib);
7586                   /* Operand is just <disp>  */
7587                   if (flag_code == CODE_64BIT)
7588                     {
7589                       /* 64bit mode overwrites the 32bit absolute
7590                          addressing by RIP relative addressing and
7591                          absolute addressing is encoded by one of the
7592                          redundant SIB forms.  */
7593                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7594                       i.sib.base = NO_BASE_REGISTER;
7595                       i.sib.index = NO_INDEX_REGISTER;
7596                       newdisp = (!i.prefix[ADDR_PREFIX] ? disp32s : disp32);
7597                     }
7598                   else if ((flag_code == CODE_16BIT)
7599                            ^ (i.prefix[ADDR_PREFIX] != 0))
7600                     {
7601                       i.rm.regmem = NO_BASE_REGISTER_16;
7602                       newdisp = disp16;
7603                     }
7604                   else
7605                     {
7606                       i.rm.regmem = NO_BASE_REGISTER;
7607                       newdisp = disp32;
7608                     }
7609                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
7610                   i.types[op] = operand_type_or (i.types[op], newdisp);
7611                 }
7612               else if (!i.tm.opcode_modifier.vecsib)
7613                 {
7614                   /* !i.base_reg && i.index_reg  */
7615                   if (i.index_reg->reg_num == RegIZ)
7616                     i.sib.index = NO_INDEX_REGISTER;
7617                   else
7618                     i.sib.index = i.index_reg->reg_num;
7619                   i.sib.base = NO_BASE_REGISTER;
7620                   i.sib.scale = i.log2_scale_factor;
7621                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7622                   i.types[op].bitfield.disp8 = 0;
7623                   i.types[op].bitfield.disp16 = 0;
7624                   i.types[op].bitfield.disp64 = 0;
7625                   if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
7626                     {
7627                       /* Must be 32 bit */
7628                       i.types[op].bitfield.disp32 = 1;
7629                       i.types[op].bitfield.disp32s = 0;
7630                     }
7631                   else
7632                     {
7633                       i.types[op].bitfield.disp32 = 0;
7634                       i.types[op].bitfield.disp32s = 1;
7635                     }
7636                   if ((i.index_reg->reg_flags & RegRex) != 0)
7637                     i.rex |= REX_X;
7638                 }
7639             }
7640           /* RIP addressing for 64bit mode.  */
7641           else if (i.base_reg->reg_num == RegIP)
7642             {
7643               gas_assert (!i.tm.opcode_modifier.vecsib);
7644               i.rm.regmem = NO_BASE_REGISTER;
7645               i.types[op].bitfield.disp8 = 0;
7646               i.types[op].bitfield.disp16 = 0;
7647               i.types[op].bitfield.disp32 = 0;
7648               i.types[op].bitfield.disp32s = 1;
7649               i.types[op].bitfield.disp64 = 0;
7650               i.flags[op] |= Operand_PCrel;
7651               if (! i.disp_operands)
7652                 fake_zero_displacement = 1;
7653             }
7654           else if (i.base_reg->reg_type.bitfield.word)
7655             {
7656               gas_assert (!i.tm.opcode_modifier.vecsib);
7657               switch (i.base_reg->reg_num)
7658                 {
7659                 case 3: /* (%bx)  */
7660                   if (i.index_reg == 0)
7661                     i.rm.regmem = 7;
7662                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
7663                     i.rm.regmem = i.index_reg->reg_num - 6;
7664                   break;
7665                 case 5: /* (%bp)  */
7666                   default_seg = &ss;
7667                   if (i.index_reg == 0)
7668                     {
7669                       i.rm.regmem = 6;
7670                       if (operand_type_check (i.types[op], disp) == 0)
7671                         {
7672                           /* fake (%bp) into 0(%bp)  */
7673                           i.types[op].bitfield.disp8 = 1;
7674                           fake_zero_displacement = 1;
7675                         }
7676                     }
7677                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
7678                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
7679                   break;
7680                 default: /* (%si) -> 4 or (%di) -> 5  */
7681                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
7682                 }
7683               i.rm.mode = mode_from_disp_size (i.types[op]);
7684             }
7685           else /* i.base_reg and 32/64 bit mode  */
7686             {
7687               if (flag_code == CODE_64BIT
7688                   && operand_type_check (i.types[op], disp))
7689                 {
7690                   i.types[op].bitfield.disp16 = 0;
7691                   i.types[op].bitfield.disp64 = 0;
7692                   if (i.prefix[ADDR_PREFIX] == 0)
7693                     {
7694                       i.types[op].bitfield.disp32 = 0;
7695                       i.types[op].bitfield.disp32s = 1;
7696                     }
7697                   else
7698                     {
7699                       i.types[op].bitfield.disp32 = 1;
7700                       i.types[op].bitfield.disp32s = 0;
7701                     }
7702                 }
7703
7704               if (!i.tm.opcode_modifier.vecsib)
7705                 i.rm.regmem = i.base_reg->reg_num;
7706               if ((i.base_reg->reg_flags & RegRex) != 0)
7707                 i.rex |= REX_B;
7708               i.sib.base = i.base_reg->reg_num;
7709               /* x86-64 ignores REX prefix bit here to avoid decoder
7710                  complications.  */
7711               if (!(i.base_reg->reg_flags & RegRex)
7712                   && (i.base_reg->reg_num == EBP_REG_NUM
7713                    || i.base_reg->reg_num == ESP_REG_NUM))
7714                   default_seg = &ss;
7715               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
7716                 {
7717                   fake_zero_displacement = 1;
7718                   i.types[op].bitfield.disp8 = 1;
7719                 }
7720               i.sib.scale = i.log2_scale_factor;
7721               if (i.index_reg == 0)
7722                 {
7723                   gas_assert (!i.tm.opcode_modifier.vecsib);
7724                   /* <disp>(%esp) becomes two byte modrm with no index
7725                      register.  We've already stored the code for esp
7726                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
7727                      Any base register besides %esp will not use the
7728                      extra modrm byte.  */
7729                   i.sib.index = NO_INDEX_REGISTER;
7730                 }
7731               else if (!i.tm.opcode_modifier.vecsib)
7732                 {
7733                   if (i.index_reg->reg_num == RegIZ)
7734                     i.sib.index = NO_INDEX_REGISTER;
7735                   else
7736                     i.sib.index = i.index_reg->reg_num;
7737                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7738                   if ((i.index_reg->reg_flags & RegRex) != 0)
7739                     i.rex |= REX_X;
7740                 }
7741
7742               if (i.disp_operands
7743                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
7744                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
7745                 i.rm.mode = 0;
7746               else
7747                 {
7748                   if (!fake_zero_displacement
7749                       && !i.disp_operands
7750                       && i.disp_encoding)
7751                     {
7752                       fake_zero_displacement = 1;
7753                       if (i.disp_encoding == disp_encoding_8bit)
7754                         i.types[op].bitfield.disp8 = 1;
7755                       else
7756                         i.types[op].bitfield.disp32 = 1;
7757                     }
7758                   i.rm.mode = mode_from_disp_size (i.types[op]);
7759                 }
7760             }
7761
7762           if (fake_zero_displacement)
7763             {
7764               /* Fakes a zero displacement assuming that i.types[op]
7765                  holds the correct displacement size.  */
7766               expressionS *exp;
7767
7768               gas_assert (i.op[op].disps == 0);
7769               exp = &disp_expressions[i.disp_operands++];
7770               i.op[op].disps = exp;
7771               exp->X_op = O_constant;
7772               exp->X_add_number = 0;
7773               exp->X_add_symbol = (symbolS *) 0;
7774               exp->X_op_symbol = (symbolS *) 0;
7775             }
7776
7777           mem = op;
7778         }
7779       else
7780         mem = ~0;
7781
7782       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
7783         {
7784           if (operand_type_check (i.types[0], imm))
7785             i.vex.register_specifier = NULL;
7786           else
7787             {
7788               /* VEX.vvvv encodes one of the sources when the first
7789                  operand is not an immediate.  */
7790               if (i.tm.opcode_modifier.vexw == VEXW0)
7791                 i.vex.register_specifier = i.op[0].regs;
7792               else
7793                 i.vex.register_specifier = i.op[1].regs;
7794             }
7795
7796           /* Destination is a XMM register encoded in the ModRM.reg
7797              and VEX.R bit.  */
7798           i.rm.reg = i.op[2].regs->reg_num;
7799           if ((i.op[2].regs->reg_flags & RegRex) != 0)
7800             i.rex |= REX_R;
7801
7802           /* ModRM.rm and VEX.B encodes the other source.  */
7803           if (!i.mem_operands)
7804             {
7805               i.rm.mode = 3;
7806
7807               if (i.tm.opcode_modifier.vexw == VEXW0)
7808                 i.rm.regmem = i.op[1].regs->reg_num;
7809               else
7810                 i.rm.regmem = i.op[0].regs->reg_num;
7811
7812               if ((i.op[1].regs->reg_flags & RegRex) != 0)
7813                 i.rex |= REX_B;
7814             }
7815         }
7816       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
7817         {
7818           i.vex.register_specifier = i.op[2].regs;
7819           if (!i.mem_operands)
7820             {
7821               i.rm.mode = 3;
7822               i.rm.regmem = i.op[1].regs->reg_num;
7823               if ((i.op[1].regs->reg_flags & RegRex) != 0)
7824                 i.rex |= REX_B;
7825             }
7826         }
7827       /* Fill in i.rm.reg or i.rm.regmem field with register operand
7828          (if any) based on i.tm.extension_opcode.  Again, we must be
7829          careful to make sure that segment/control/debug/test/MMX
7830          registers are coded into the i.rm.reg field.  */
7831       else if (i.reg_operands)
7832         {
7833           unsigned int op;
7834           unsigned int vex_reg = ~0;
7835
7836           for (op = 0; op < i.operands; op++)
7837             {
7838               if (i.types[op].bitfield.class == Reg
7839                   || i.types[op].bitfield.class == RegBND
7840                   || i.types[op].bitfield.class == RegMask
7841                   || i.types[op].bitfield.class == SReg
7842                   || i.types[op].bitfield.class == RegCR
7843                   || i.types[op].bitfield.class == RegDR
7844                   || i.types[op].bitfield.class == RegTR)
7845                 break;
7846               if (i.types[op].bitfield.class == RegSIMD)
7847                 {
7848                   if (i.types[op].bitfield.zmmword)
7849                     i.has_regzmm = TRUE;
7850                   else if (i.types[op].bitfield.ymmword)
7851                     i.has_regymm = TRUE;
7852                   else
7853                     i.has_regxmm = TRUE;
7854                   break;
7855                 }
7856               if (i.types[op].bitfield.class == RegMMX)
7857                 {
7858                   i.has_regmmx = TRUE;
7859                   break;
7860                 }
7861             }
7862
7863           if (vex_3_sources)
7864             op = dest;
7865           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7866             {
7867               /* For instructions with VexNDS, the register-only
7868                  source operand is encoded in VEX prefix. */
7869               gas_assert (mem != (unsigned int) ~0);
7870
7871               if (op > mem)
7872                 {
7873                   vex_reg = op++;
7874                   gas_assert (op < i.operands);
7875                 }
7876               else
7877                 {
7878                   /* Check register-only source operand when two source
7879                      operands are swapped.  */
7880                   if (!i.tm.operand_types[op].bitfield.baseindex
7881                       && i.tm.operand_types[op + 1].bitfield.baseindex)
7882                     {
7883                       vex_reg = op;
7884                       op += 2;
7885                       gas_assert (mem == (vex_reg + 1)
7886                                   && op < i.operands);
7887                     }
7888                   else
7889                     {
7890                       vex_reg = op + 1;
7891                       gas_assert (vex_reg < i.operands);
7892                     }
7893                 }
7894             }
7895           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
7896             {
7897               /* For instructions with VexNDD, the register destination
7898                  is encoded in VEX prefix.  */
7899               if (i.mem_operands == 0)
7900                 {
7901                   /* There is no memory operand.  */
7902                   gas_assert ((op + 2) == i.operands);
7903                   vex_reg = op + 1;
7904                 }
7905               else
7906                 {
7907                   /* There are only 2 non-immediate operands.  */
7908                   gas_assert (op < i.imm_operands + 2
7909                               && i.operands == i.imm_operands + 2);
7910                   vex_reg = i.imm_operands + 1;
7911                 }
7912             }
7913           else
7914             gas_assert (op < i.operands);
7915
7916           if (vex_reg != (unsigned int) ~0)
7917             {
7918               i386_operand_type *type = &i.tm.operand_types[vex_reg];
7919
7920               if ((type->bitfield.class != Reg
7921                    || (!type->bitfield.dword && !type->bitfield.qword))
7922                   && type->bitfield.class != RegSIMD
7923                   && !operand_type_equal (type, &regmask))
7924                 abort ();
7925
7926               i.vex.register_specifier = i.op[vex_reg].regs;
7927             }
7928
7929           /* Don't set OP operand twice.  */
7930           if (vex_reg != op)
7931             {
7932               /* If there is an extension opcode to put here, the
7933                  register number must be put into the regmem field.  */
7934               if (i.tm.extension_opcode != None)
7935                 {
7936                   i.rm.regmem = i.op[op].regs->reg_num;
7937                   if ((i.op[op].regs->reg_flags & RegRex) != 0)
7938                     i.rex |= REX_B;
7939                   if ((i.op[op].regs->reg_flags & RegVRex) != 0)
7940                     i.vrex |= REX_B;
7941                 }
7942               else
7943                 {
7944                   i.rm.reg = i.op[op].regs->reg_num;
7945                   if ((i.op[op].regs->reg_flags & RegRex) != 0)
7946                     i.rex |= REX_R;
7947                   if ((i.op[op].regs->reg_flags & RegVRex) != 0)
7948                     i.vrex |= REX_R;
7949                 }
7950             }
7951
7952           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
7953              must set it to 3 to indicate this is a register operand
7954              in the regmem field.  */
7955           if (!i.mem_operands)
7956             i.rm.mode = 3;
7957         }
7958
7959       /* Fill in i.rm.reg field with extension opcode (if any).  */
7960       if (i.tm.extension_opcode != None)
7961         i.rm.reg = i.tm.extension_opcode;
7962     }
7963   return default_seg;
7964 }
7965
7966 static unsigned int
7967 flip_code16 (unsigned int code16)
7968 {
7969   gas_assert (i.tm.operands == 1);
7970
7971   return !(i.prefix[REX_PREFIX] & REX_W)
7972          && (code16 ? i.tm.operand_types[0].bitfield.disp32
7973                       || i.tm.operand_types[0].bitfield.disp32s
7974                     : i.tm.operand_types[0].bitfield.disp16)
7975          ? CODE16 : 0;
7976 }
7977
7978 static void
7979 output_branch (void)
7980 {
7981   char *p;
7982   int size;
7983   int code16;
7984   int prefix;
7985   relax_substateT subtype;
7986   symbolS *sym;
7987   offsetT off;
7988
7989   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
7990   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
7991
7992   prefix = 0;
7993   if (i.prefix[DATA_PREFIX] != 0)
7994     {
7995       prefix = 1;
7996       i.prefixes -= 1;
7997       code16 ^= flip_code16(code16);
7998     }
7999   /* Pentium4 branch hints.  */
8000   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8001       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8002     {
8003       prefix++;
8004       i.prefixes--;
8005     }
8006   if (i.prefix[REX_PREFIX] != 0)
8007     {
8008       prefix++;
8009       i.prefixes--;
8010     }
8011
8012   /* BND prefixed jump.  */
8013   if (i.prefix[BND_PREFIX] != 0)
8014     {
8015       prefix++;
8016       i.prefixes--;
8017     }
8018
8019   if (i.prefixes != 0)
8020     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8021
8022   /* It's always a symbol;  End frag & setup for relax.
8023      Make sure there is enough room in this frag for the largest
8024      instruction we may generate in md_convert_frag.  This is 2
8025      bytes for the opcode and room for the prefix and largest
8026      displacement.  */
8027   frag_grow (prefix + 2 + 4);
8028   /* Prefix and 1 opcode byte go in fr_fix.  */
8029   p = frag_more (prefix + 1);
8030   if (i.prefix[DATA_PREFIX] != 0)
8031     *p++ = DATA_PREFIX_OPCODE;
8032   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8033       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8034     *p++ = i.prefix[SEG_PREFIX];
8035   if (i.prefix[BND_PREFIX] != 0)
8036     *p++ = BND_PREFIX_OPCODE;
8037   if (i.prefix[REX_PREFIX] != 0)
8038     *p++ = i.prefix[REX_PREFIX];
8039   *p = i.tm.base_opcode;
8040
8041   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8042     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8043   else if (cpu_arch_flags.bitfield.cpui386)
8044     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8045   else
8046     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8047   subtype |= code16;
8048
8049   sym = i.op[0].disps->X_add_symbol;
8050   off = i.op[0].disps->X_add_number;
8051
8052   if (i.op[0].disps->X_op != O_constant
8053       && i.op[0].disps->X_op != O_symbol)
8054     {
8055       /* Handle complex expressions.  */
8056       sym = make_expr_symbol (i.op[0].disps);
8057       off = 0;
8058     }
8059
8060   /* 1 possible extra opcode + 4 byte displacement go in var part.
8061      Pass reloc in fr_var.  */
8062   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8063 }
8064
8065 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8066 /* Return TRUE iff PLT32 relocation should be used for branching to
8067    symbol S.  */
8068
8069 static bfd_boolean
8070 need_plt32_p (symbolS *s)
8071 {
8072   /* PLT32 relocation is ELF only.  */
8073   if (!IS_ELF)
8074     return FALSE;
8075
8076 #ifdef TE_SOLARIS
8077   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8078      krtld support it.  */
8079   return FALSE;
8080 #endif
8081
8082   /* Since there is no need to prepare for PLT branch on x86-64, we
8083      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8084      be used as a marker for 32-bit PC-relative branches.  */
8085   if (!object_64bit)
8086     return FALSE;
8087
8088   /* Weak or undefined symbol need PLT32 relocation.  */
8089   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8090     return TRUE;
8091
8092   /* Non-global symbol doesn't need PLT32 relocation.  */
8093   if (! S_IS_EXTERNAL (s))
8094     return FALSE;
8095
8096   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8097      non-default visibilities are treated as normal global symbol
8098      so that PLT32 relocation can be used as a marker for 32-bit
8099      PC-relative branches.  It is useful for linker relaxation.  */
8100   return TRUE;
8101 }
8102 #endif
8103
8104 static void
8105 output_jump (void)
8106 {
8107   char *p;
8108   int size;
8109   fixS *fixP;
8110   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8111
8112   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8113     {
8114       /* This is a loop or jecxz type instruction.  */
8115       size = 1;
8116       if (i.prefix[ADDR_PREFIX] != 0)
8117         {
8118           FRAG_APPEND_1_CHAR (ADDR_PREFIX_OPCODE);
8119           i.prefixes -= 1;
8120         }
8121       /* Pentium4 branch hints.  */
8122       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8123           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8124         {
8125           FRAG_APPEND_1_CHAR (i.prefix[SEG_PREFIX]);
8126           i.prefixes--;
8127         }
8128     }
8129   else
8130     {
8131       int code16;
8132
8133       code16 = 0;
8134       if (flag_code == CODE_16BIT)
8135         code16 = CODE16;
8136
8137       if (i.prefix[DATA_PREFIX] != 0)
8138         {
8139           FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE);
8140           i.prefixes -= 1;
8141           code16 ^= flip_code16(code16);
8142         }
8143
8144       size = 4;
8145       if (code16)
8146         size = 2;
8147     }
8148
8149   /* BND prefixed jump.  */
8150   if (i.prefix[BND_PREFIX] != 0)
8151     {
8152       FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
8153       i.prefixes -= 1;
8154     }
8155
8156   if (i.prefix[REX_PREFIX] != 0)
8157     {
8158       FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]);
8159       i.prefixes -= 1;
8160     }
8161
8162   if (i.prefixes != 0)
8163     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8164
8165   p = frag_more (i.tm.opcode_length + size);
8166   switch (i.tm.opcode_length)
8167     {
8168     case 2:
8169       *p++ = i.tm.base_opcode >> 8;
8170       /* Fall through.  */
8171     case 1:
8172       *p++ = i.tm.base_opcode;
8173       break;
8174     default:
8175       abort ();
8176     }
8177
8178 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8179   if (size == 4
8180       && jump_reloc == NO_RELOC
8181       && need_plt32_p (i.op[0].disps->X_add_symbol))
8182     jump_reloc = BFD_RELOC_X86_64_PLT32;
8183 #endif
8184
8185   jump_reloc = reloc (size, 1, 1, jump_reloc);
8186
8187   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8188                       i.op[0].disps, 1, jump_reloc);
8189
8190   /* All jumps handled here are signed, but don't use a signed limit
8191      check for 32 and 16 bit jumps as we want to allow wrap around at
8192      4G and 64k respectively.  */
8193   if (size == 1)
8194     fixP->fx_signed = 1;
8195 }
8196
8197 static void
8198 output_interseg_jump (void)
8199 {
8200   char *p;
8201   int size;
8202   int prefix;
8203   int code16;
8204
8205   code16 = 0;
8206   if (flag_code == CODE_16BIT)
8207     code16 = CODE16;
8208
8209   prefix = 0;
8210   if (i.prefix[DATA_PREFIX] != 0)
8211     {
8212       prefix = 1;
8213       i.prefixes -= 1;
8214       code16 ^= CODE16;
8215     }
8216
8217   gas_assert (!i.prefix[REX_PREFIX]);
8218
8219   size = 4;
8220   if (code16)
8221     size = 2;
8222
8223   if (i.prefixes != 0)
8224     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8225
8226   /* 1 opcode; 2 segment; offset  */
8227   p = frag_more (prefix + 1 + 2 + size);
8228
8229   if (i.prefix[DATA_PREFIX] != 0)
8230     *p++ = DATA_PREFIX_OPCODE;
8231
8232   if (i.prefix[REX_PREFIX] != 0)
8233     *p++ = i.prefix[REX_PREFIX];
8234
8235   *p++ = i.tm.base_opcode;
8236   if (i.op[1].imms->X_op == O_constant)
8237     {
8238       offsetT n = i.op[1].imms->X_add_number;
8239
8240       if (size == 2
8241           && !fits_in_unsigned_word (n)
8242           && !fits_in_signed_word (n))
8243         {
8244           as_bad (_("16-bit jump out of range"));
8245           return;
8246         }
8247       md_number_to_chars (p, n, size);
8248     }
8249   else
8250     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8251                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
8252   if (i.op[0].imms->X_op != O_constant)
8253     as_bad (_("can't handle non absolute segment in `%s'"),
8254             i.tm.name);
8255   md_number_to_chars (p + size, (valueT) i.op[0].imms->X_add_number, 2);
8256 }
8257
8258 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8259 void
8260 x86_cleanup (void)
8261 {
8262   char *p;
8263   asection *seg = now_seg;
8264   subsegT subseg = now_subseg;
8265   asection *sec;
8266   unsigned int alignment, align_size_1;
8267   unsigned int isa_1_descsz, feature_2_descsz, descsz;
8268   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
8269   unsigned int padding;
8270
8271   if (!IS_ELF || !x86_used_note)
8272     return;
8273
8274   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
8275
8276   /* The .note.gnu.property section layout:
8277
8278      Field      Length          Contents
8279      ----       ----            ----
8280      n_namsz    4               4
8281      n_descsz   4               The note descriptor size
8282      n_type     4               NT_GNU_PROPERTY_TYPE_0
8283      n_name     4               "GNU"
8284      n_desc     n_descsz        The program property array
8285      ....       ....            ....
8286    */
8287
8288   /* Create the .note.gnu.property section.  */
8289   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
8290   bfd_set_section_flags (sec,
8291                          (SEC_ALLOC
8292                           | SEC_LOAD
8293                           | SEC_DATA
8294                           | SEC_HAS_CONTENTS
8295                           | SEC_READONLY));
8296
8297   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
8298     {
8299       align_size_1 = 7;
8300       alignment = 3;
8301     }
8302   else
8303     {
8304       align_size_1 = 3;
8305       alignment = 2;
8306     }
8307
8308   bfd_set_section_alignment (sec, alignment);
8309   elf_section_type (sec) = SHT_NOTE;
8310
8311   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
8312                                   + 4-byte data  */
8313   isa_1_descsz_raw = 4 + 4 + 4;
8314   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
8315   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
8316
8317   feature_2_descsz_raw = isa_1_descsz;
8318   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
8319                                       + 4-byte data  */
8320   feature_2_descsz_raw += 4 + 4 + 4;
8321   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
8322   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
8323                       & ~align_size_1);
8324
8325   descsz = feature_2_descsz;
8326   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
8327   p = frag_more (4 + 4 + 4 + 4 + descsz);
8328
8329   /* Write n_namsz.  */
8330   md_number_to_chars (p, (valueT) 4, 4);
8331
8332   /* Write n_descsz.  */
8333   md_number_to_chars (p + 4, (valueT) descsz, 4);
8334
8335   /* Write n_type.  */
8336   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
8337
8338   /* Write n_name.  */
8339   memcpy (p + 4 * 3, "GNU", 4);
8340
8341   /* Write 4-byte type.  */
8342   md_number_to_chars (p + 4 * 4,
8343                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
8344
8345   /* Write 4-byte data size.  */
8346   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
8347
8348   /* Write 4-byte data.  */
8349   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
8350
8351   /* Zero out paddings.  */
8352   padding = isa_1_descsz - isa_1_descsz_raw;
8353   if (padding)
8354     memset (p + 4 * 7, 0, padding);
8355
8356   /* Write 4-byte type.  */
8357   md_number_to_chars (p + isa_1_descsz + 4 * 4,
8358                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
8359
8360   /* Write 4-byte data size.  */
8361   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
8362
8363   /* Write 4-byte data.  */
8364   md_number_to_chars (p + isa_1_descsz + 4 * 6,
8365                       (valueT) x86_feature_2_used, 4);
8366
8367   /* Zero out paddings.  */
8368   padding = feature_2_descsz - feature_2_descsz_raw;
8369   if (padding)
8370     memset (p + isa_1_descsz + 4 * 7, 0, padding);
8371
8372   /* We probably can't restore the current segment, for there likely
8373      isn't one yet...  */
8374   if (seg && subseg)
8375     subseg_set (seg, subseg);
8376 }
8377 #endif
8378
8379 static unsigned int
8380 encoding_length (const fragS *start_frag, offsetT start_off,
8381                  const char *frag_now_ptr)
8382 {
8383   unsigned int len = 0;
8384
8385   if (start_frag != frag_now)
8386     {
8387       const fragS *fr = start_frag;
8388
8389       do {
8390         len += fr->fr_fix;
8391         fr = fr->fr_next;
8392       } while (fr && fr != frag_now);
8393     }
8394
8395   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
8396 }
8397
8398 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
8399    be macro-fused with conditional jumps.
8400    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
8401    or is one of the following format:
8402
8403     cmp m, imm
8404     add m, imm
8405     sub m, imm
8406    test m, imm
8407     and m, imm
8408     inc m
8409     dec m
8410
8411    it is unfusible.  */
8412
8413 static int
8414 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
8415 {
8416   /* No RIP address.  */
8417   if (i.base_reg && i.base_reg->reg_num == RegIP)
8418     return 0;
8419
8420   /* No VEX/EVEX encoding.  */
8421   if (is_any_vex_encoding (&i.tm))
8422     return 0;
8423
8424   /* add, sub without add/sub m, imm.  */
8425   if (i.tm.base_opcode <= 5
8426       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
8427       || ((i.tm.base_opcode | 3) == 0x83
8428           && (i.tm.extension_opcode == 0x5
8429               || i.tm.extension_opcode == 0x0)))
8430     {
8431       *mf_cmp_p = mf_cmp_alu_cmp;
8432       return !(i.mem_operands && i.imm_operands);
8433     }
8434
8435   /* and without and m, imm.  */
8436   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
8437       || ((i.tm.base_opcode | 3) == 0x83
8438           && i.tm.extension_opcode == 0x4))
8439     {
8440       *mf_cmp_p = mf_cmp_test_and;
8441       return !(i.mem_operands && i.imm_operands);
8442     }
8443
8444   /* test without test m imm.  */
8445   if ((i.tm.base_opcode | 1) == 0x85
8446       || (i.tm.base_opcode | 1) == 0xa9
8447       || ((i.tm.base_opcode | 1) == 0xf7
8448           && i.tm.extension_opcode == 0))
8449     {
8450       *mf_cmp_p = mf_cmp_test_and;
8451       return !(i.mem_operands && i.imm_operands);
8452     }
8453
8454   /* cmp without cmp m, imm.  */
8455   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
8456       || ((i.tm.base_opcode | 3) == 0x83
8457           && (i.tm.extension_opcode == 0x7)))
8458     {
8459       *mf_cmp_p = mf_cmp_alu_cmp;
8460       return !(i.mem_operands && i.imm_operands);
8461     }
8462
8463   /* inc, dec without inc/dec m.   */
8464   if ((i.tm.cpu_flags.bitfield.cpuno64
8465        && (i.tm.base_opcode | 0xf) == 0x4f)
8466       || ((i.tm.base_opcode | 1) == 0xff
8467           && i.tm.extension_opcode <= 0x1))
8468     {
8469       *mf_cmp_p = mf_cmp_incdec;
8470       return !i.mem_operands;
8471     }
8472
8473   return 0;
8474 }
8475
8476 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
8477
8478 static int
8479 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
8480 {
8481   /* NB: Don't work with COND_JUMP86 without i386.  */
8482   if (!align_branch_power
8483       || now_seg == absolute_section
8484       || !cpu_arch_flags.bitfield.cpui386
8485       || !(align_branch & align_branch_fused_bit))
8486     return 0;
8487
8488   if (maybe_fused_with_jcc_p (mf_cmp_p))
8489     {
8490       if (last_insn.kind == last_insn_other
8491           || last_insn.seg != now_seg)
8492         return 1;
8493       if (flag_debug)
8494         as_warn_where (last_insn.file, last_insn.line,
8495                        _("`%s` skips -malign-branch-boundary on `%s`"),
8496                        last_insn.name, i.tm.name);
8497     }
8498
8499   return 0;
8500 }
8501
8502 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
8503
8504 static int
8505 add_branch_prefix_frag_p (void)
8506 {
8507   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
8508      to PadLock instructions since they include prefixes in opcode.  */
8509   if (!align_branch_power
8510       || !align_branch_prefix_size
8511       || now_seg == absolute_section
8512       || i.tm.cpu_flags.bitfield.cpupadlock
8513       || !cpu_arch_flags.bitfield.cpui386)
8514     return 0;
8515
8516   /* Don't add prefix if it is a prefix or there is no operand in case
8517      that segment prefix is special.  */
8518   if (!i.operands || i.tm.opcode_modifier.isprefix)
8519     return 0;
8520
8521   if (last_insn.kind == last_insn_other
8522       || last_insn.seg != now_seg)
8523     return 1;
8524
8525   if (flag_debug)
8526     as_warn_where (last_insn.file, last_insn.line,
8527                    _("`%s` skips -malign-branch-boundary on `%s`"),
8528                    last_insn.name, i.tm.name);
8529
8530   return 0;
8531 }
8532
8533 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
8534
8535 static int
8536 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
8537                            enum mf_jcc_kind *mf_jcc_p)
8538 {
8539   int add_padding;
8540
8541   /* NB: Don't work with COND_JUMP86 without i386.  */
8542   if (!align_branch_power
8543       || now_seg == absolute_section
8544       || !cpu_arch_flags.bitfield.cpui386)
8545     return 0;
8546
8547   add_padding = 0;
8548
8549   /* Check for jcc and direct jmp.  */
8550   if (i.tm.opcode_modifier.jump == JUMP)
8551     {
8552       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
8553         {
8554           *branch_p = align_branch_jmp;
8555           add_padding = align_branch & align_branch_jmp_bit;
8556         }
8557       else
8558         {
8559           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
8560              igore the lowest bit.  */
8561           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
8562           *branch_p = align_branch_jcc;
8563           if ((align_branch & align_branch_jcc_bit))
8564             add_padding = 1;
8565         }
8566     }
8567   else if (is_any_vex_encoding (&i.tm))
8568     return 0;
8569   else if ((i.tm.base_opcode | 1) == 0xc3)
8570     {
8571       /* Near ret.  */
8572       *branch_p = align_branch_ret;
8573       if ((align_branch & align_branch_ret_bit))
8574         add_padding = 1;
8575     }
8576   else
8577     {
8578       /* Check for indirect jmp, direct and indirect calls.  */
8579       if (i.tm.base_opcode == 0xe8)
8580         {
8581           /* Direct call.  */
8582           *branch_p = align_branch_call;
8583           if ((align_branch & align_branch_call_bit))
8584             add_padding = 1;
8585         }
8586       else if (i.tm.base_opcode == 0xff
8587                && (i.tm.extension_opcode == 2
8588                    || i.tm.extension_opcode == 4))
8589         {
8590           /* Indirect call and jmp.  */
8591           *branch_p = align_branch_indirect;
8592           if ((align_branch & align_branch_indirect_bit))
8593             add_padding = 1;
8594         }
8595
8596       if (add_padding
8597           && i.disp_operands
8598           && tls_get_addr
8599           && (i.op[0].disps->X_op == O_symbol
8600               || (i.op[0].disps->X_op == O_subtract
8601                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
8602         {
8603           symbolS *s = i.op[0].disps->X_add_symbol;
8604           /* No padding to call to global or undefined tls_get_addr.  */
8605           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
8606               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
8607             return 0;
8608         }
8609     }
8610
8611   if (add_padding
8612       && last_insn.kind != last_insn_other
8613       && last_insn.seg == now_seg)
8614     {
8615       if (flag_debug)
8616         as_warn_where (last_insn.file, last_insn.line,
8617                        _("`%s` skips -malign-branch-boundary on `%s`"),
8618                        last_insn.name, i.tm.name);
8619       return 0;
8620     }
8621
8622   return add_padding;
8623 }
8624
8625 static void
8626 output_insn (void)
8627 {
8628   fragS *insn_start_frag;
8629   offsetT insn_start_off;
8630   fragS *fragP = NULL;
8631   enum align_branch_kind branch = align_branch_none;
8632   /* The initializer is arbitrary just to avoid uninitialized error.
8633      it's actually either assigned in add_branch_padding_frag_p
8634      or never be used.  */
8635   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
8636
8637 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8638   if (IS_ELF && x86_used_note)
8639     {
8640       if (i.tm.cpu_flags.bitfield.cpucmov)
8641         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV;
8642       if (i.tm.cpu_flags.bitfield.cpusse)
8643         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE;
8644       if (i.tm.cpu_flags.bitfield.cpusse2)
8645         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE2;
8646       if (i.tm.cpu_flags.bitfield.cpusse3)
8647         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE3;
8648       if (i.tm.cpu_flags.bitfield.cpussse3)
8649         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSSE3;
8650       if (i.tm.cpu_flags.bitfield.cpusse4_1)
8651         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_1;
8652       if (i.tm.cpu_flags.bitfield.cpusse4_2)
8653         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_2;
8654       if (i.tm.cpu_flags.bitfield.cpuavx)
8655         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX;
8656       if (i.tm.cpu_flags.bitfield.cpuavx2)
8657         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX2;
8658       if (i.tm.cpu_flags.bitfield.cpufma)
8659         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_FMA;
8660       if (i.tm.cpu_flags.bitfield.cpuavx512f)
8661         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512F;
8662       if (i.tm.cpu_flags.bitfield.cpuavx512cd)
8663         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512CD;
8664       if (i.tm.cpu_flags.bitfield.cpuavx512er)
8665         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512ER;
8666       if (i.tm.cpu_flags.bitfield.cpuavx512pf)
8667         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512PF;
8668       if (i.tm.cpu_flags.bitfield.cpuavx512vl)
8669         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512VL;
8670       if (i.tm.cpu_flags.bitfield.cpuavx512dq)
8671         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512DQ;
8672       if (i.tm.cpu_flags.bitfield.cpuavx512bw)
8673         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512BW;
8674       if (i.tm.cpu_flags.bitfield.cpuavx512_4fmaps)
8675         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS;
8676       if (i.tm.cpu_flags.bitfield.cpuavx512_4vnniw)
8677         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW;
8678       if (i.tm.cpu_flags.bitfield.cpuavx512_bitalg)
8679         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BITALG;
8680       if (i.tm.cpu_flags.bitfield.cpuavx512ifma)
8681         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_IFMA;
8682       if (i.tm.cpu_flags.bitfield.cpuavx512vbmi)
8683         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI;
8684       if (i.tm.cpu_flags.bitfield.cpuavx512_vbmi2)
8685         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
8686       if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
8687         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
8688       if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
8689         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
8690
8691       if (i.tm.cpu_flags.bitfield.cpu8087
8692           || i.tm.cpu_flags.bitfield.cpu287
8693           || i.tm.cpu_flags.bitfield.cpu387
8694           || i.tm.cpu_flags.bitfield.cpu687
8695           || i.tm.cpu_flags.bitfield.cpufisttp)
8696         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
8697       if (i.has_regmmx
8698           || i.tm.base_opcode == 0xf77 /* emms */
8699           || i.tm.base_opcode == 0xf0e /* femms */
8700           || i.tm.base_opcode == 0xf2a /* cvtpi2ps */
8701           || i.tm.base_opcode == 0x660f2a /* cvtpi2pd */)
8702         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
8703       if (i.has_regxmm)
8704         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
8705       if (i.has_regymm)
8706         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
8707       if (i.has_regzmm)
8708         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
8709       if (i.tm.cpu_flags.bitfield.cpufxsr)
8710         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
8711       if (i.tm.cpu_flags.bitfield.cpuxsave)
8712         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
8713       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
8714         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
8715       if (i.tm.cpu_flags.bitfield.cpuxsavec)
8716         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
8717     }
8718 #endif
8719
8720   /* Tie dwarf2 debug info to the address at the start of the insn.
8721      We can't do this after the insn has been output as the current
8722      frag may have been closed off.  eg. by frag_var.  */
8723   dwarf2_emit_insn (0);
8724
8725   insn_start_frag = frag_now;
8726   insn_start_off = frag_now_fix ();
8727
8728   if (add_branch_padding_frag_p (&branch, &mf_jcc))
8729     {
8730       char *p;
8731       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
8732       unsigned int max_branch_padding_size = 14;
8733
8734       /* Align section to boundary.  */
8735       record_alignment (now_seg, align_branch_power);
8736
8737       /* Make room for padding.  */
8738       frag_grow (max_branch_padding_size);
8739
8740       /* Start of the padding.  */
8741       p = frag_more (0);
8742
8743       fragP = frag_now;
8744
8745       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
8746                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
8747                 NULL, 0, p);
8748
8749       fragP->tc_frag_data.mf_type = mf_jcc;
8750       fragP->tc_frag_data.branch_type = branch;
8751       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
8752     }
8753
8754   /* Output jumps.  */
8755   if (i.tm.opcode_modifier.jump == JUMP)
8756     output_branch ();
8757   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
8758            || i.tm.opcode_modifier.jump == JUMP_DWORD)
8759     output_jump ();
8760   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
8761     output_interseg_jump ();
8762   else
8763     {
8764       /* Output normal instructions here.  */
8765       char *p;
8766       unsigned char *q;
8767       unsigned int j;
8768       unsigned int prefix;
8769       enum mf_cmp_kind mf_cmp;
8770
8771       if (avoid_fence
8772           && (i.tm.base_opcode == 0xfaee8
8773               || i.tm.base_opcode == 0xfaef0
8774               || i.tm.base_opcode == 0xfaef8))
8775         {
8776           /* Encode lfence, mfence, and sfence as
8777              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
8778           offsetT val = 0x240483f0ULL;
8779           p = frag_more (5);
8780           md_number_to_chars (p, val, 5);
8781           return;
8782         }
8783
8784       /* Some processors fail on LOCK prefix. This options makes
8785          assembler ignore LOCK prefix and serves as a workaround.  */
8786       if (omit_lock_prefix)
8787         {
8788           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE)
8789             return;
8790           i.prefix[LOCK_PREFIX] = 0;
8791         }
8792
8793       if (branch)
8794         /* Skip if this is a branch.  */
8795         ;
8796       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
8797         {
8798           /* Make room for padding.  */
8799           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
8800           p = frag_more (0);
8801
8802           fragP = frag_now;
8803
8804           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
8805                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
8806                     NULL, 0, p);
8807
8808           fragP->tc_frag_data.mf_type = mf_cmp;
8809           fragP->tc_frag_data.branch_type = align_branch_fused;
8810           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
8811         }
8812       else if (add_branch_prefix_frag_p ())
8813         {
8814           unsigned int max_prefix_size = align_branch_prefix_size;
8815
8816           /* Make room for padding.  */
8817           frag_grow (max_prefix_size);
8818           p = frag_more (0);
8819
8820           fragP = frag_now;
8821
8822           frag_var (rs_machine_dependent, max_prefix_size, 0,
8823                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
8824                     NULL, 0, p);
8825
8826           fragP->tc_frag_data.max_bytes = max_prefix_size;
8827         }
8828
8829       /* Since the VEX/EVEX prefix contains the implicit prefix, we
8830          don't need the explicit prefix.  */
8831       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
8832         {
8833           switch (i.tm.opcode_length)
8834             {
8835             case 3:
8836               if (i.tm.base_opcode & 0xff000000)
8837                 {
8838                   prefix = (i.tm.base_opcode >> 24) & 0xff;
8839                   if (!i.tm.cpu_flags.bitfield.cpupadlock
8840                       || prefix != REPE_PREFIX_OPCODE
8841                       || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
8842                     add_prefix (prefix);
8843                 }
8844               break;
8845             case 2:
8846               if ((i.tm.base_opcode & 0xff0000) != 0)
8847                 {
8848                   prefix = (i.tm.base_opcode >> 16) & 0xff;
8849                   add_prefix (prefix);
8850                 }
8851               break;
8852             case 1:
8853               break;
8854             case 0:
8855               /* Check for pseudo prefixes.  */
8856               as_bad_where (insn_start_frag->fr_file,
8857                             insn_start_frag->fr_line,
8858                              _("pseudo prefix without instruction"));
8859               return;
8860             default:
8861               abort ();
8862             }
8863
8864 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
8865           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
8866              R_X86_64_GOTTPOFF relocation so that linker can safely
8867              perform IE->LE optimization.  A dummy REX_OPCODE prefix
8868              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
8869              relocation for GDesc -> IE/LE optimization.  */
8870           if (x86_elf_abi == X86_64_X32_ABI
8871               && i.operands == 2
8872               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
8873                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
8874               && i.prefix[REX_PREFIX] == 0)
8875             add_prefix (REX_OPCODE);
8876 #endif
8877
8878           /* The prefix bytes.  */
8879           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
8880             if (*q)
8881               FRAG_APPEND_1_CHAR (*q);
8882         }
8883       else
8884         {
8885           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
8886             if (*q)
8887               switch (j)
8888                 {
8889                 case REX_PREFIX:
8890                   /* REX byte is encoded in VEX prefix.  */
8891                   break;
8892                 case SEG_PREFIX:
8893                 case ADDR_PREFIX:
8894                   FRAG_APPEND_1_CHAR (*q);
8895                   break;
8896                 default:
8897                   /* There should be no other prefixes for instructions
8898                      with VEX prefix.  */
8899                   abort ();
8900                 }
8901
8902           /* For EVEX instructions i.vrex should become 0 after
8903              build_evex_prefix.  For VEX instructions upper 16 registers
8904              aren't available, so VREX should be 0.  */
8905           if (i.vrex)
8906             abort ();
8907           /* Now the VEX prefix.  */
8908           p = frag_more (i.vex.length);
8909           for (j = 0; j < i.vex.length; j++)
8910             p[j] = i.vex.bytes[j];
8911         }
8912
8913       /* Now the opcode; be careful about word order here!  */
8914       if (i.tm.opcode_length == 1)
8915         {
8916           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
8917         }
8918       else
8919         {
8920           switch (i.tm.opcode_length)
8921             {
8922             case 4:
8923               p = frag_more (4);
8924               *p++ = (i.tm.base_opcode >> 24) & 0xff;
8925               *p++ = (i.tm.base_opcode >> 16) & 0xff;
8926               break;
8927             case 3:
8928               p = frag_more (3);
8929               *p++ = (i.tm.base_opcode >> 16) & 0xff;
8930               break;
8931             case 2:
8932               p = frag_more (2);
8933               break;
8934             default:
8935               abort ();
8936               break;
8937             }
8938
8939           /* Put out high byte first: can't use md_number_to_chars!  */
8940           *p++ = (i.tm.base_opcode >> 8) & 0xff;
8941           *p = i.tm.base_opcode & 0xff;
8942         }
8943
8944       /* Now the modrm byte and sib byte (if present).  */
8945       if (i.tm.opcode_modifier.modrm)
8946         {
8947           FRAG_APPEND_1_CHAR ((i.rm.regmem << 0
8948                                | i.rm.reg << 3
8949                                | i.rm.mode << 6));
8950           /* If i.rm.regmem == ESP (4)
8951              && i.rm.mode != (Register mode)
8952              && not 16 bit
8953              ==> need second modrm byte.  */
8954           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
8955               && i.rm.mode != 3
8956               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
8957             FRAG_APPEND_1_CHAR ((i.sib.base << 0
8958                                  | i.sib.index << 3
8959                                  | i.sib.scale << 6));
8960         }
8961
8962       if (i.disp_operands)
8963         output_disp (insn_start_frag, insn_start_off);
8964
8965       if (i.imm_operands)
8966         output_imm (insn_start_frag, insn_start_off);
8967
8968       /*
8969        * frag_now_fix () returning plain abs_section_offset when we're in the
8970        * absolute section, and abs_section_offset not getting updated as data
8971        * gets added to the frag breaks the logic below.
8972        */
8973       if (now_seg != absolute_section)
8974         {
8975           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
8976           if (j > 15)
8977             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
8978                      j);
8979           else if (fragP)
8980             {
8981               /* NB: Don't add prefix with GOTPC relocation since
8982                  output_disp() above depends on the fixed encoding
8983                  length.  Can't add prefix with TLS relocation since
8984                  it breaks TLS linker optimization.  */
8985               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
8986               /* Prefix count on the current instruction.  */
8987               unsigned int count = i.vex.length;
8988               unsigned int k;
8989               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
8990                 /* REX byte is encoded in VEX/EVEX prefix.  */
8991                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
8992                   count++;
8993
8994               /* Count prefixes for extended opcode maps.  */
8995               if (!i.vex.length)
8996                 switch (i.tm.opcode_length)
8997                   {
8998                   case 3:
8999                     if (((i.tm.base_opcode >> 16) & 0xff) == 0xf)
9000                       {
9001                         count++;
9002                         switch ((i.tm.base_opcode >> 8) & 0xff)
9003                           {
9004                           case 0x38:
9005                           case 0x3a:
9006                             count++;
9007                             break;
9008                           default:
9009                             break;
9010                           }
9011                       }
9012                     break;
9013                   case 2:
9014                     if (((i.tm.base_opcode >> 8) & 0xff) == 0xf)
9015                       count++;
9016                     break;
9017                   case 1:
9018                     break;
9019                   default:
9020                     abort ();
9021                   }
9022
9023               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9024                   == BRANCH_PREFIX)
9025                 {
9026                   /* Set the maximum prefix size in BRANCH_PREFIX
9027                      frag.  */
9028                   if (fragP->tc_frag_data.max_bytes > max)
9029                     fragP->tc_frag_data.max_bytes = max;
9030                   if (fragP->tc_frag_data.max_bytes > count)
9031                     fragP->tc_frag_data.max_bytes -= count;
9032                   else
9033                     fragP->tc_frag_data.max_bytes = 0;
9034                 }
9035               else
9036                 {
9037                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9038                      frag.  */
9039                   unsigned int max_prefix_size;
9040                   if (align_branch_prefix_size > max)
9041                     max_prefix_size = max;
9042                   else
9043                     max_prefix_size = align_branch_prefix_size;
9044                   if (max_prefix_size > count)
9045                     fragP->tc_frag_data.max_prefix_length
9046                       = max_prefix_size - count;
9047                 }
9048
9049               /* Use existing segment prefix if possible.  Use CS
9050                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9051                  segment prefix with ESP/EBP base register and use DS
9052                  segment prefix without ESP/EBP base register.  */
9053               if (i.prefix[SEG_PREFIX])
9054                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9055               else if (flag_code == CODE_64BIT)
9056                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9057               else if (i.base_reg
9058                        && (i.base_reg->reg_num == 4
9059                            || i.base_reg->reg_num == 5))
9060                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9061               else
9062                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9063             }
9064         }
9065     }
9066
9067   /* NB: Don't work with COND_JUMP86 without i386.  */
9068   if (align_branch_power
9069       && now_seg != absolute_section
9070       && cpu_arch_flags.bitfield.cpui386)
9071     {
9072       /* Terminate each frag so that we can add prefix and check for
9073          fused jcc.  */
9074       frag_wane (frag_now);
9075       frag_new (0);
9076     }
9077
9078 #ifdef DEBUG386
9079   if (flag_debug)
9080     {
9081       pi ("" /*line*/, &i);
9082     }
9083 #endif /* DEBUG386  */
9084 }
9085
9086 /* Return the size of the displacement operand N.  */
9087
9088 static int
9089 disp_size (unsigned int n)
9090 {
9091   int size = 4;
9092
9093   if (i.types[n].bitfield.disp64)
9094     size = 8;
9095   else if (i.types[n].bitfield.disp8)
9096     size = 1;
9097   else if (i.types[n].bitfield.disp16)
9098     size = 2;
9099   return size;
9100 }
9101
9102 /* Return the size of the immediate operand N.  */
9103
9104 static int
9105 imm_size (unsigned int n)
9106 {
9107   int size = 4;
9108   if (i.types[n].bitfield.imm64)
9109     size = 8;
9110   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9111     size = 1;
9112   else if (i.types[n].bitfield.imm16)
9113     size = 2;
9114   return size;
9115 }
9116
9117 static void
9118 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9119 {
9120   char *p;
9121   unsigned int n;
9122
9123   for (n = 0; n < i.operands; n++)
9124     {
9125       if (operand_type_check (i.types[n], disp))
9126         {
9127           if (i.op[n].disps->X_op == O_constant)
9128             {
9129               int size = disp_size (n);
9130               offsetT val = i.op[n].disps->X_add_number;
9131
9132               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9133                                      size);
9134               p = frag_more (size);
9135               md_number_to_chars (p, val, size);
9136             }
9137           else
9138             {
9139               enum bfd_reloc_code_real reloc_type;
9140               int size = disp_size (n);
9141               int sign = i.types[n].bitfield.disp32s;
9142               int pcrel = (i.flags[n] & Operand_PCrel) != 0;
9143               fixS *fixP;
9144
9145               /* We can't have 8 bit displacement here.  */
9146               gas_assert (!i.types[n].bitfield.disp8);
9147
9148               /* The PC relative address is computed relative
9149                  to the instruction boundary, so in case immediate
9150                  fields follows, we need to adjust the value.  */
9151               if (pcrel && i.imm_operands)
9152                 {
9153                   unsigned int n1;
9154                   int sz = 0;
9155
9156                   for (n1 = 0; n1 < i.operands; n1++)
9157                     if (operand_type_check (i.types[n1], imm))
9158                       {
9159                         /* Only one immediate is allowed for PC
9160                            relative address.  */
9161                         gas_assert (sz == 0);
9162                         sz = imm_size (n1);
9163                         i.op[n].disps->X_add_number -= sz;
9164                       }
9165                   /* We should find the immediate.  */
9166                   gas_assert (sz != 0);
9167                 }
9168
9169               p = frag_more (size);
9170               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9171               if (GOT_symbol
9172                   && GOT_symbol == i.op[n].disps->X_add_symbol
9173                   && (((reloc_type == BFD_RELOC_32
9174                         || reloc_type == BFD_RELOC_X86_64_32S
9175                         || (reloc_type == BFD_RELOC_64
9176                             && object_64bit))
9177                        && (i.op[n].disps->X_op == O_symbol
9178                            || (i.op[n].disps->X_op == O_add
9179                                && ((symbol_get_value_expression
9180                                     (i.op[n].disps->X_op_symbol)->X_op)
9181                                    == O_subtract))))
9182                       || reloc_type == BFD_RELOC_32_PCREL))
9183                 {
9184                   if (!object_64bit)
9185                     {
9186                       reloc_type = BFD_RELOC_386_GOTPC;
9187                       i.has_gotpc_tls_reloc = TRUE;
9188                       i.op[n].imms->X_add_number +=
9189                         encoding_length (insn_start_frag, insn_start_off, p);
9190                     }
9191                   else if (reloc_type == BFD_RELOC_64)
9192                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9193                   else
9194                     /* Don't do the adjustment for x86-64, as there
9195                        the pcrel addressing is relative to the _next_
9196                        insn, and that is taken care of in other code.  */
9197                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9198                 }
9199               else if (align_branch_power)
9200                 {
9201                   switch (reloc_type)
9202                     {
9203                     case BFD_RELOC_386_TLS_GD:
9204                     case BFD_RELOC_386_TLS_LDM:
9205                     case BFD_RELOC_386_TLS_IE:
9206                     case BFD_RELOC_386_TLS_IE_32:
9207                     case BFD_RELOC_386_TLS_GOTIE:
9208                     case BFD_RELOC_386_TLS_GOTDESC:
9209                     case BFD_RELOC_386_TLS_DESC_CALL:
9210                     case BFD_RELOC_X86_64_TLSGD:
9211                     case BFD_RELOC_X86_64_TLSLD:
9212                     case BFD_RELOC_X86_64_GOTTPOFF:
9213                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
9214                     case BFD_RELOC_X86_64_TLSDESC_CALL:
9215                       i.has_gotpc_tls_reloc = TRUE;
9216                     default:
9217                       break;
9218                     }
9219                 }
9220               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
9221                                   size, i.op[n].disps, pcrel,
9222                                   reloc_type);
9223               /* Check for "call/jmp *mem", "mov mem, %reg",
9224                  "test %reg, mem" and "binop mem, %reg" where binop
9225                  is one of adc, add, and, cmp, or, sbb, sub, xor
9226                  instructions without data prefix.  Always generate
9227                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
9228               if (i.prefix[DATA_PREFIX] == 0
9229                   && (generate_relax_relocations
9230                       || (!object_64bit
9231                           && i.rm.mode == 0
9232                           && i.rm.regmem == 5))
9233                   && (i.rm.mode == 2
9234                       || (i.rm.mode == 0 && i.rm.regmem == 5))
9235                   && !is_any_vex_encoding(&i.tm)
9236                   && ((i.operands == 1
9237                        && i.tm.base_opcode == 0xff
9238                        && (i.rm.reg == 2 || i.rm.reg == 4))
9239                       || (i.operands == 2
9240                           && (i.tm.base_opcode == 0x8b
9241                               || i.tm.base_opcode == 0x85
9242                               || (i.tm.base_opcode & ~0x38) == 0x03))))
9243                 {
9244                   if (object_64bit)
9245                     {
9246                       fixP->fx_tcbit = i.rex != 0;
9247                       if (i.base_reg
9248                           && (i.base_reg->reg_num == RegIP))
9249                       fixP->fx_tcbit2 = 1;
9250                     }
9251                   else
9252                     fixP->fx_tcbit2 = 1;
9253                 }
9254             }
9255         }
9256     }
9257 }
9258
9259 static void
9260 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
9261 {
9262   char *p;
9263   unsigned int n;
9264
9265   for (n = 0; n < i.operands; n++)
9266     {
9267       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
9268       if (i.rounding && (int) n == i.rounding->operand)
9269         continue;
9270
9271       if (operand_type_check (i.types[n], imm))
9272         {
9273           if (i.op[n].imms->X_op == O_constant)
9274             {
9275               int size = imm_size (n);
9276               offsetT val;
9277
9278               val = offset_in_range (i.op[n].imms->X_add_number,
9279                                      size);
9280               p = frag_more (size);
9281               md_number_to_chars (p, val, size);
9282             }
9283           else
9284             {
9285               /* Not absolute_section.
9286                  Need a 32-bit fixup (don't support 8bit
9287                  non-absolute imms).  Try to support other
9288                  sizes ...  */
9289               enum bfd_reloc_code_real reloc_type;
9290               int size = imm_size (n);
9291               int sign;
9292
9293               if (i.types[n].bitfield.imm32s
9294                   && (i.suffix == QWORD_MNEM_SUFFIX
9295                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
9296                 sign = 1;
9297               else
9298                 sign = 0;
9299
9300               p = frag_more (size);
9301               reloc_type = reloc (size, 0, sign, i.reloc[n]);
9302
9303               /*   This is tough to explain.  We end up with this one if we
9304                * have operands that look like
9305                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
9306                * obtain the absolute address of the GOT, and it is strongly
9307                * preferable from a performance point of view to avoid using
9308                * a runtime relocation for this.  The actual sequence of
9309                * instructions often look something like:
9310                *
9311                *        call    .L66
9312                * .L66:
9313                *        popl    %ebx
9314                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
9315                *
9316                *   The call and pop essentially return the absolute address
9317                * of the label .L66 and store it in %ebx.  The linker itself
9318                * will ultimately change the first operand of the addl so
9319                * that %ebx points to the GOT, but to keep things simple, the
9320                * .o file must have this operand set so that it generates not
9321                * the absolute address of .L66, but the absolute address of
9322                * itself.  This allows the linker itself simply treat a GOTPC
9323                * relocation as asking for a pcrel offset to the GOT to be
9324                * added in, and the addend of the relocation is stored in the
9325                * operand field for the instruction itself.
9326                *
9327                *   Our job here is to fix the operand so that it would add
9328                * the correct offset so that %ebx would point to itself.  The
9329                * thing that is tricky is that .-.L66 will point to the
9330                * beginning of the instruction, so we need to further modify
9331                * the operand so that it will point to itself.  There are
9332                * other cases where you have something like:
9333                *
9334                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
9335                *
9336                * and here no correction would be required.  Internally in
9337                * the assembler we treat operands of this form as not being
9338                * pcrel since the '.' is explicitly mentioned, and I wonder
9339                * whether it would simplify matters to do it this way.  Who
9340                * knows.  In earlier versions of the PIC patches, the
9341                * pcrel_adjust field was used to store the correction, but
9342                * since the expression is not pcrel, I felt it would be
9343                * confusing to do it this way.  */
9344
9345               if ((reloc_type == BFD_RELOC_32
9346                    || reloc_type == BFD_RELOC_X86_64_32S
9347                    || reloc_type == BFD_RELOC_64)
9348                   && GOT_symbol
9349                   && GOT_symbol == i.op[n].imms->X_add_symbol
9350                   && (i.op[n].imms->X_op == O_symbol
9351                       || (i.op[n].imms->X_op == O_add
9352                           && ((symbol_get_value_expression
9353                                (i.op[n].imms->X_op_symbol)->X_op)
9354                               == O_subtract))))
9355                 {
9356                   if (!object_64bit)
9357                     reloc_type = BFD_RELOC_386_GOTPC;
9358                   else if (size == 4)
9359                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
9360                   else if (size == 8)
9361                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
9362                   i.has_gotpc_tls_reloc = TRUE;
9363                   i.op[n].imms->X_add_number +=
9364                     encoding_length (insn_start_frag, insn_start_off, p);
9365                 }
9366               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9367                            i.op[n].imms, 0, reloc_type);
9368             }
9369         }
9370     }
9371 }
9372 \f
9373 /* x86_cons_fix_new is called via the expression parsing code when a
9374    reloc is needed.  We use this hook to get the correct .got reloc.  */
9375 static int cons_sign = -1;
9376
9377 void
9378 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
9379                   expressionS *exp, bfd_reloc_code_real_type r)
9380 {
9381   r = reloc (len, 0, cons_sign, r);
9382
9383 #ifdef TE_PE
9384   if (exp->X_op == O_secrel)
9385     {
9386       exp->X_op = O_symbol;
9387       r = BFD_RELOC_32_SECREL;
9388     }
9389 #endif
9390
9391   fix_new_exp (frag, off, len, exp, 0, r);
9392 }
9393
9394 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
9395    purpose of the `.dc.a' internal pseudo-op.  */
9396
9397 int
9398 x86_address_bytes (void)
9399 {
9400   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
9401     return 4;
9402   return stdoutput->arch_info->bits_per_address / 8;
9403 }
9404
9405 #if !(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
9406     || defined (LEX_AT)
9407 # define lex_got(reloc, adjust, types) NULL
9408 #else
9409 /* Parse operands of the form
9410    <symbol>@GOTOFF+<nnn>
9411    and similar .plt or .got references.
9412
9413    If we find one, set up the correct relocation in RELOC and copy the
9414    input string, minus the `@GOTOFF' into a malloc'd buffer for
9415    parsing by the calling routine.  Return this buffer, and if ADJUST
9416    is non-null set it to the length of the string we removed from the
9417    input line.  Otherwise return NULL.  */
9418 static char *
9419 lex_got (enum bfd_reloc_code_real *rel,
9420          int *adjust,
9421          i386_operand_type *types)
9422 {
9423   /* Some of the relocations depend on the size of what field is to
9424      be relocated.  But in our callers i386_immediate and i386_displacement
9425      we don't yet know the operand size (this will be set by insn
9426      matching).  Hence we record the word32 relocation here,
9427      and adjust the reloc according to the real size in reloc().  */
9428   static const struct {
9429     const char *str;
9430     int len;
9431     const enum bfd_reloc_code_real rel[2];
9432     const i386_operand_type types64;
9433   } gotrel[] = {
9434 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9435     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
9436                                         BFD_RELOC_SIZE32 },
9437       OPERAND_TYPE_IMM32_64 },
9438 #endif
9439     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
9440                                        BFD_RELOC_X86_64_PLTOFF64 },
9441       OPERAND_TYPE_IMM64 },
9442     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
9443                                        BFD_RELOC_X86_64_PLT32    },
9444       OPERAND_TYPE_IMM32_32S_DISP32 },
9445     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
9446                                        BFD_RELOC_X86_64_GOTPLT64 },
9447       OPERAND_TYPE_IMM64_DISP64 },
9448     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
9449                                        BFD_RELOC_X86_64_GOTOFF64 },
9450       OPERAND_TYPE_IMM64_DISP64 },
9451     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
9452                                        BFD_RELOC_X86_64_GOTPCREL },
9453       OPERAND_TYPE_IMM32_32S_DISP32 },
9454     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
9455                                        BFD_RELOC_X86_64_TLSGD    },
9456       OPERAND_TYPE_IMM32_32S_DISP32 },
9457     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
9458                                        _dummy_first_bfd_reloc_code_real },
9459       OPERAND_TYPE_NONE },
9460     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
9461                                        BFD_RELOC_X86_64_TLSLD    },
9462       OPERAND_TYPE_IMM32_32S_DISP32 },
9463     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
9464                                        BFD_RELOC_X86_64_GOTTPOFF },
9465       OPERAND_TYPE_IMM32_32S_DISP32 },
9466     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
9467                                        BFD_RELOC_X86_64_TPOFF32  },
9468       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9469     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
9470                                        _dummy_first_bfd_reloc_code_real },
9471       OPERAND_TYPE_NONE },
9472     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
9473                                        BFD_RELOC_X86_64_DTPOFF32 },
9474       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9475     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
9476                                        _dummy_first_bfd_reloc_code_real },
9477       OPERAND_TYPE_NONE },
9478     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
9479                                        _dummy_first_bfd_reloc_code_real },
9480       OPERAND_TYPE_NONE },
9481     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
9482                                        BFD_RELOC_X86_64_GOT32    },
9483       OPERAND_TYPE_IMM32_32S_64_DISP32 },
9484     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
9485                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
9486       OPERAND_TYPE_IMM32_32S_DISP32 },
9487     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
9488                                        BFD_RELOC_X86_64_TLSDESC_CALL },
9489       OPERAND_TYPE_IMM32_32S_DISP32 },
9490   };
9491   char *cp;
9492   unsigned int j;
9493
9494 #if defined (OBJ_MAYBE_ELF)
9495   if (!IS_ELF)
9496     return NULL;
9497 #endif
9498
9499   for (cp = input_line_pointer; *cp != '@'; cp++)
9500     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
9501       return NULL;
9502
9503   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
9504     {
9505       int len = gotrel[j].len;
9506       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
9507         {
9508           if (gotrel[j].rel[object_64bit] != 0)
9509             {
9510               int first, second;
9511               char *tmpbuf, *past_reloc;
9512
9513               *rel = gotrel[j].rel[object_64bit];
9514
9515               if (types)
9516                 {
9517                   if (flag_code != CODE_64BIT)
9518                     {
9519                       types->bitfield.imm32 = 1;
9520                       types->bitfield.disp32 = 1;
9521                     }
9522                   else
9523                     *types = gotrel[j].types64;
9524                 }
9525
9526               if (j != 0 && GOT_symbol == NULL)
9527                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
9528
9529               /* The length of the first part of our input line.  */
9530               first = cp - input_line_pointer;
9531
9532               /* The second part goes from after the reloc token until
9533                  (and including) an end_of_line char or comma.  */
9534               past_reloc = cp + 1 + len;
9535               cp = past_reloc;
9536               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
9537                 ++cp;
9538               second = cp + 1 - past_reloc;
9539
9540               /* Allocate and copy string.  The trailing NUL shouldn't
9541                  be necessary, but be safe.  */
9542               tmpbuf = XNEWVEC (char, first + second + 2);
9543               memcpy (tmpbuf, input_line_pointer, first);
9544               if (second != 0 && *past_reloc != ' ')
9545                 /* Replace the relocation token with ' ', so that
9546                    errors like foo@GOTOFF1 will be detected.  */
9547                 tmpbuf[first++] = ' ';
9548               else
9549                 /* Increment length by 1 if the relocation token is
9550                    removed.  */
9551                 len++;
9552               if (adjust)
9553                 *adjust = len;
9554               memcpy (tmpbuf + first, past_reloc, second);
9555               tmpbuf[first + second] = '\0';
9556               return tmpbuf;
9557             }
9558
9559           as_bad (_("@%s reloc is not supported with %d-bit output format"),
9560                   gotrel[j].str, 1 << (5 + object_64bit));
9561           return NULL;
9562         }
9563     }
9564
9565   /* Might be a symbol version string.  Don't as_bad here.  */
9566   return NULL;
9567 }
9568 #endif
9569
9570 #ifdef TE_PE
9571 #ifdef lex_got
9572 #undef lex_got
9573 #endif
9574 /* Parse operands of the form
9575    <symbol>@SECREL32+<nnn>
9576
9577    If we find one, set up the correct relocation in RELOC and copy the
9578    input string, minus the `@SECREL32' into a malloc'd buffer for
9579    parsing by the calling routine.  Return this buffer, and if ADJUST
9580    is non-null set it to the length of the string we removed from the
9581    input line.  Otherwise return NULL.
9582
9583    This function is copied from the ELF version above adjusted for PE targets.  */
9584
9585 static char *
9586 lex_got (enum bfd_reloc_code_real *rel ATTRIBUTE_UNUSED,
9587          int *adjust ATTRIBUTE_UNUSED,
9588          i386_operand_type *types)
9589 {
9590   static const struct
9591   {
9592     const char *str;
9593     int len;
9594     const enum bfd_reloc_code_real rel[2];
9595     const i386_operand_type types64;
9596   }
9597   gotrel[] =
9598   {
9599     { STRING_COMMA_LEN ("SECREL32"),    { BFD_RELOC_32_SECREL,
9600                                           BFD_RELOC_32_SECREL },
9601       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9602   };
9603
9604   char *cp;
9605   unsigned j;
9606
9607   for (cp = input_line_pointer; *cp != '@'; cp++)
9608     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
9609       return NULL;
9610
9611   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
9612     {
9613       int len = gotrel[j].len;
9614
9615       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
9616         {
9617           if (gotrel[j].rel[object_64bit] != 0)
9618             {
9619               int first, second;
9620               char *tmpbuf, *past_reloc;
9621
9622               *rel = gotrel[j].rel[object_64bit];
9623               if (adjust)
9624                 *adjust = len;
9625
9626               if (types)
9627                 {
9628                   if (flag_code != CODE_64BIT)
9629                     {
9630                       types->bitfield.imm32 = 1;
9631                       types->bitfield.disp32 = 1;
9632                     }
9633                   else
9634                     *types = gotrel[j].types64;
9635                 }
9636
9637               /* The length of the first part of our input line.  */
9638               first = cp - input_line_pointer;
9639
9640               /* The second part goes from after the reloc token until
9641                  (and including) an end_of_line char or comma.  */
9642               past_reloc = cp + 1 + len;
9643               cp = past_reloc;
9644               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
9645                 ++cp;
9646               second = cp + 1 - past_reloc;
9647
9648               /* Allocate and copy string.  The trailing NUL shouldn't
9649                  be necessary, but be safe.  */
9650               tmpbuf = XNEWVEC (char, first + second + 2);
9651               memcpy (tmpbuf, input_line_pointer, first);
9652               if (second != 0 && *past_reloc != ' ')
9653                 /* Replace the relocation token with ' ', so that
9654                    errors like foo@SECLREL321 will be detected.  */
9655                 tmpbuf[first++] = ' ';
9656               memcpy (tmpbuf + first, past_reloc, second);
9657               tmpbuf[first + second] = '\0';
9658               return tmpbuf;
9659             }
9660
9661           as_bad (_("@%s reloc is not supported with %d-bit output format"),
9662                   gotrel[j].str, 1 << (5 + object_64bit));
9663           return NULL;
9664         }
9665     }
9666
9667   /* Might be a symbol version string.  Don't as_bad here.  */
9668   return NULL;
9669 }
9670
9671 #endif /* TE_PE */
9672
9673 bfd_reloc_code_real_type
9674 x86_cons (expressionS *exp, int size)
9675 {
9676   bfd_reloc_code_real_type got_reloc = NO_RELOC;
9677
9678   intel_syntax = -intel_syntax;
9679
9680   exp->X_md = 0;
9681   if (size == 4 || (object_64bit && size == 8))
9682     {
9683       /* Handle @GOTOFF and the like in an expression.  */
9684       char *save;
9685       char *gotfree_input_line;
9686       int adjust = 0;
9687
9688       save = input_line_pointer;
9689       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
9690       if (gotfree_input_line)
9691         input_line_pointer = gotfree_input_line;
9692
9693       expression (exp);
9694
9695       if (gotfree_input_line)
9696         {
9697           /* expression () has merrily parsed up to the end of line,
9698              or a comma - in the wrong buffer.  Transfer how far
9699              input_line_pointer has moved to the right buffer.  */
9700           input_line_pointer = (save
9701                                 + (input_line_pointer - gotfree_input_line)
9702                                 + adjust);
9703           free (gotfree_input_line);
9704           if (exp->X_op == O_constant
9705               || exp->X_op == O_absent
9706               || exp->X_op == O_illegal
9707               || exp->X_op == O_register
9708               || exp->X_op == O_big)
9709             {
9710               char c = *input_line_pointer;
9711               *input_line_pointer = 0;
9712               as_bad (_("missing or invalid expression `%s'"), save);
9713               *input_line_pointer = c;
9714             }
9715           else if ((got_reloc == BFD_RELOC_386_PLT32
9716                     || got_reloc == BFD_RELOC_X86_64_PLT32)
9717                    && exp->X_op != O_symbol)
9718             {
9719               char c = *input_line_pointer;
9720               *input_line_pointer = 0;
9721               as_bad (_("invalid PLT expression `%s'"), save);
9722               *input_line_pointer = c;
9723             }
9724         }
9725     }
9726   else
9727     expression (exp);
9728
9729   intel_syntax = -intel_syntax;
9730
9731   if (intel_syntax)
9732     i386_intel_simplify (exp);
9733
9734   return got_reloc;
9735 }
9736
9737 static void
9738 signed_cons (int size)
9739 {
9740   if (flag_code == CODE_64BIT)
9741     cons_sign = 1;
9742   cons (size);
9743   cons_sign = -1;
9744 }
9745
9746 #ifdef TE_PE
9747 static void
9748 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
9749 {
9750   expressionS exp;
9751
9752   do
9753     {
9754       expression (&exp);
9755       if (exp.X_op == O_symbol)
9756         exp.X_op = O_secrel;
9757
9758       emit_expr (&exp, 4);
9759     }
9760   while (*input_line_pointer++ == ',');
9761
9762   input_line_pointer--;
9763   demand_empty_rest_of_line ();
9764 }
9765 #endif
9766
9767 /* Handle Vector operations.  */
9768
9769 static char *
9770 check_VecOperations (char *op_string, char *op_end)
9771 {
9772   const reg_entry *mask;
9773   const char *saved;
9774   char *end_op;
9775
9776   while (*op_string
9777          && (op_end == NULL || op_string < op_end))
9778     {
9779       saved = op_string;
9780       if (*op_string == '{')
9781         {
9782           op_string++;
9783
9784           /* Check broadcasts.  */
9785           if (strncmp (op_string, "1to", 3) == 0)
9786             {
9787               int bcst_type;
9788
9789               if (i.broadcast)
9790                 goto duplicated_vec_op;
9791
9792               op_string += 3;
9793               if (*op_string == '8')
9794                 bcst_type = 8;
9795               else if (*op_string == '4')
9796                 bcst_type = 4;
9797               else if (*op_string == '2')
9798                 bcst_type = 2;
9799               else if (*op_string == '1'
9800                        && *(op_string+1) == '6')
9801                 {
9802                   bcst_type = 16;
9803                   op_string++;
9804                 }
9805               else
9806                 {
9807                   as_bad (_("Unsupported broadcast: `%s'"), saved);
9808                   return NULL;
9809                 }
9810               op_string++;
9811
9812               broadcast_op.type = bcst_type;
9813               broadcast_op.operand = this_operand;
9814               broadcast_op.bytes = 0;
9815               i.broadcast = &broadcast_op;
9816             }
9817           /* Check masking operation.  */
9818           else if ((mask = parse_register (op_string, &end_op)) != NULL)
9819             {
9820               /* k0 can't be used for write mask.  */
9821               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
9822                 {
9823                   as_bad (_("`%s%s' can't be used for write mask"),
9824                           register_prefix, mask->reg_name);
9825                   return NULL;
9826                 }
9827
9828               if (!i.mask)
9829                 {
9830                   mask_op.mask = mask;
9831                   mask_op.zeroing = 0;
9832                   mask_op.operand = this_operand;
9833                   i.mask = &mask_op;
9834                 }
9835               else
9836                 {
9837                   if (i.mask->mask)
9838                     goto duplicated_vec_op;
9839
9840                   i.mask->mask = mask;
9841
9842                   /* Only "{z}" is allowed here.  No need to check
9843                      zeroing mask explicitly.  */
9844                   if (i.mask->operand != this_operand)
9845                     {
9846                       as_bad (_("invalid write mask `%s'"), saved);
9847                       return NULL;
9848                     }
9849                 }
9850
9851               op_string = end_op;
9852             }
9853           /* Check zeroing-flag for masking operation.  */
9854           else if (*op_string == 'z')
9855             {
9856               if (!i.mask)
9857                 {
9858                   mask_op.mask = NULL;
9859                   mask_op.zeroing = 1;
9860                   mask_op.operand = this_operand;
9861                   i.mask = &mask_op;
9862                 }
9863               else
9864                 {
9865                   if (i.mask->zeroing)
9866                     {
9867                     duplicated_vec_op:
9868                       as_bad (_("duplicated `%s'"), saved);
9869                       return NULL;
9870                     }
9871
9872                   i.mask->zeroing = 1;
9873
9874                   /* Only "{%k}" is allowed here.  No need to check mask
9875                      register explicitly.  */
9876                   if (i.mask->operand != this_operand)
9877                     {
9878                       as_bad (_("invalid zeroing-masking `%s'"),
9879                               saved);
9880                       return NULL;
9881                     }
9882                 }
9883
9884               op_string++;
9885             }
9886           else
9887             goto unknown_vec_op;
9888
9889           if (*op_string != '}')
9890             {
9891               as_bad (_("missing `}' in `%s'"), saved);
9892               return NULL;
9893             }
9894           op_string++;
9895
9896           /* Strip whitespace since the addition of pseudo prefixes
9897              changed how the scrubber treats '{'.  */
9898           if (is_space_char (*op_string))
9899             ++op_string;
9900
9901           continue;
9902         }
9903     unknown_vec_op:
9904       /* We don't know this one.  */
9905       as_bad (_("unknown vector operation: `%s'"), saved);
9906       return NULL;
9907     }
9908
9909   if (i.mask && i.mask->zeroing && !i.mask->mask)
9910     {
9911       as_bad (_("zeroing-masking only allowed with write mask"));
9912       return NULL;
9913     }
9914
9915   return op_string;
9916 }
9917
9918 static int
9919 i386_immediate (char *imm_start)
9920 {
9921   char *save_input_line_pointer;
9922   char *gotfree_input_line;
9923   segT exp_seg = 0;
9924   expressionS *exp;
9925   i386_operand_type types;
9926
9927   operand_type_set (&types, ~0);
9928
9929   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
9930     {
9931       as_bad (_("at most %d immediate operands are allowed"),
9932               MAX_IMMEDIATE_OPERANDS);
9933       return 0;
9934     }
9935
9936   exp = &im_expressions[i.imm_operands++];
9937   i.op[this_operand].imms = exp;
9938
9939   if (is_space_char (*imm_start))
9940     ++imm_start;
9941
9942   save_input_line_pointer = input_line_pointer;
9943   input_line_pointer = imm_start;
9944
9945   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
9946   if (gotfree_input_line)
9947     input_line_pointer = gotfree_input_line;
9948
9949   exp_seg = expression (exp);
9950
9951   SKIP_WHITESPACE ();
9952
9953   /* Handle vector operations.  */
9954   if (*input_line_pointer == '{')
9955     {
9956       input_line_pointer = check_VecOperations (input_line_pointer,
9957                                                 NULL);
9958       if (input_line_pointer == NULL)
9959         return 0;
9960     }
9961
9962   if (*input_line_pointer)
9963     as_bad (_("junk `%s' after expression"), input_line_pointer);
9964
9965   input_line_pointer = save_input_line_pointer;
9966   if (gotfree_input_line)
9967     {
9968       free (gotfree_input_line);
9969
9970       if (exp->X_op == O_constant || exp->X_op == O_register)
9971         exp->X_op = O_illegal;
9972     }
9973
9974   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
9975 }
9976
9977 static int
9978 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
9979                          i386_operand_type types, const char *imm_start)
9980 {
9981   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
9982     {
9983       if (imm_start)
9984         as_bad (_("missing or invalid immediate expression `%s'"),
9985                 imm_start);
9986       return 0;
9987     }
9988   else if (exp->X_op == O_constant)
9989     {
9990       /* Size it properly later.  */
9991       i.types[this_operand].bitfield.imm64 = 1;
9992       /* If not 64bit, sign extend val.  */
9993       if (flag_code != CODE_64BIT
9994           && (exp->X_add_number & ~(((addressT) 2 << 31) - 1)) == 0)
9995         exp->X_add_number
9996           = (exp->X_add_number ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
9997     }
9998 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
9999   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10000            && exp_seg != absolute_section
10001            && exp_seg != text_section
10002            && exp_seg != data_section
10003            && exp_seg != bss_section
10004            && exp_seg != undefined_section
10005            && !bfd_is_com_section (exp_seg))
10006     {
10007       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10008       return 0;
10009     }
10010 #endif
10011   else if (!intel_syntax && exp_seg == reg_section)
10012     {
10013       if (imm_start)
10014         as_bad (_("illegal immediate register operand %s"), imm_start);
10015       return 0;
10016     }
10017   else
10018     {
10019       /* This is an address.  The size of the address will be
10020          determined later, depending on destination register,
10021          suffix, or the default for the section.  */
10022       i.types[this_operand].bitfield.imm8 = 1;
10023       i.types[this_operand].bitfield.imm16 = 1;
10024       i.types[this_operand].bitfield.imm32 = 1;
10025       i.types[this_operand].bitfield.imm32s = 1;
10026       i.types[this_operand].bitfield.imm64 = 1;
10027       i.types[this_operand] = operand_type_and (i.types[this_operand],
10028                                                 types);
10029     }
10030
10031   return 1;
10032 }
10033
10034 static char *
10035 i386_scale (char *scale)
10036 {
10037   offsetT val;
10038   char *save = input_line_pointer;
10039
10040   input_line_pointer = scale;
10041   val = get_absolute_expression ();
10042
10043   switch (val)
10044     {
10045     case 1:
10046       i.log2_scale_factor = 0;
10047       break;
10048     case 2:
10049       i.log2_scale_factor = 1;
10050       break;
10051     case 4:
10052       i.log2_scale_factor = 2;
10053       break;
10054     case 8:
10055       i.log2_scale_factor = 3;
10056       break;
10057     default:
10058       {
10059         char sep = *input_line_pointer;
10060
10061         *input_line_pointer = '\0';
10062         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10063                 scale);
10064         *input_line_pointer = sep;
10065         input_line_pointer = save;
10066         return NULL;
10067       }
10068     }
10069   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10070     {
10071       as_warn (_("scale factor of %d without an index register"),
10072                1 << i.log2_scale_factor);
10073       i.log2_scale_factor = 0;
10074     }
10075   scale = input_line_pointer;
10076   input_line_pointer = save;
10077   return scale;
10078 }
10079
10080 static int
10081 i386_displacement (char *disp_start, char *disp_end)
10082 {
10083   expressionS *exp;
10084   segT exp_seg = 0;
10085   char *save_input_line_pointer;
10086   char *gotfree_input_line;
10087   int override;
10088   i386_operand_type bigdisp, types = anydisp;
10089   int ret;
10090
10091   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10092     {
10093       as_bad (_("at most %d displacement operands are allowed"),
10094               MAX_MEMORY_OPERANDS);
10095       return 0;
10096     }
10097
10098   operand_type_set (&bigdisp, 0);
10099   if (i.jumpabsolute
10100       || i.types[this_operand].bitfield.baseindex
10101       || (current_templates->start->opcode_modifier.jump != JUMP
10102           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10103     {
10104       i386_addressing_mode ();
10105       override = (i.prefix[ADDR_PREFIX] != 0);
10106       if (flag_code == CODE_64BIT)
10107         {
10108           if (!override)
10109             {
10110               bigdisp.bitfield.disp32s = 1;
10111               bigdisp.bitfield.disp64 = 1;
10112             }
10113           else
10114             bigdisp.bitfield.disp32 = 1;
10115         }
10116       else if ((flag_code == CODE_16BIT) ^ override)
10117           bigdisp.bitfield.disp16 = 1;
10118       else
10119           bigdisp.bitfield.disp32 = 1;
10120     }
10121   else
10122     {
10123       /* For PC-relative branches, the width of the displacement may be
10124          dependent upon data size, but is never dependent upon address size.
10125          Also make sure to not unintentionally match against a non-PC-relative
10126          branch template.  */
10127       static templates aux_templates;
10128       const insn_template *t = current_templates->start;
10129       bfd_boolean has_intel64 = FALSE;
10130
10131       aux_templates.start = t;
10132       while (++t < current_templates->end)
10133         {
10134           if (t->opcode_modifier.jump
10135               != current_templates->start->opcode_modifier.jump)
10136             break;
10137           if ((t->opcode_modifier.isa64 >= INTEL64))
10138             has_intel64 = TRUE;
10139         }
10140       if (t < current_templates->end)
10141         {
10142           aux_templates.end = t;
10143           current_templates = &aux_templates;
10144         }
10145
10146       override = (i.prefix[DATA_PREFIX] != 0);
10147       if (flag_code == CODE_64BIT)
10148         {
10149           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10150               && (!intel64 || !has_intel64))
10151             bigdisp.bitfield.disp16 = 1;
10152           else
10153             bigdisp.bitfield.disp32s = 1;
10154         }
10155       else
10156         {
10157           if (!override)
10158             override = (i.suffix == (flag_code != CODE_16BIT
10159                                      ? WORD_MNEM_SUFFIX
10160                                      : LONG_MNEM_SUFFIX));
10161           bigdisp.bitfield.disp32 = 1;
10162           if ((flag_code == CODE_16BIT) ^ override)
10163             {
10164               bigdisp.bitfield.disp32 = 0;
10165               bigdisp.bitfield.disp16 = 1;
10166             }
10167         }
10168     }
10169   i.types[this_operand] = operand_type_or (i.types[this_operand],
10170                                            bigdisp);
10171
10172   exp = &disp_expressions[i.disp_operands];
10173   i.op[this_operand].disps = exp;
10174   i.disp_operands++;
10175   save_input_line_pointer = input_line_pointer;
10176   input_line_pointer = disp_start;
10177   END_STRING_AND_SAVE (disp_end);
10178
10179 #ifndef GCC_ASM_O_HACK
10180 #define GCC_ASM_O_HACK 0
10181 #endif
10182 #if GCC_ASM_O_HACK
10183   END_STRING_AND_SAVE (disp_end + 1);
10184   if (i.types[this_operand].bitfield.baseIndex
10185       && displacement_string_end[-1] == '+')
10186     {
10187       /* This hack is to avoid a warning when using the "o"
10188          constraint within gcc asm statements.
10189          For instance:
10190
10191          #define _set_tssldt_desc(n,addr,limit,type) \
10192          __asm__ __volatile__ ( \
10193          "movw %w2,%0\n\t" \
10194          "movw %w1,2+%0\n\t" \
10195          "rorl $16,%1\n\t" \
10196          "movb %b1,4+%0\n\t" \
10197          "movb %4,5+%0\n\t" \
10198          "movb $0,6+%0\n\t" \
10199          "movb %h1,7+%0\n\t" \
10200          "rorl $16,%1" \
10201          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10202
10203          This works great except that the output assembler ends
10204          up looking a bit weird if it turns out that there is
10205          no offset.  You end up producing code that looks like:
10206
10207          #APP
10208          movw $235,(%eax)
10209          movw %dx,2+(%eax)
10210          rorl $16,%edx
10211          movb %dl,4+(%eax)
10212          movb $137,5+(%eax)
10213          movb $0,6+(%eax)
10214          movb %dh,7+(%eax)
10215          rorl $16,%edx
10216          #NO_APP
10217
10218          So here we provide the missing zero.  */
10219
10220       *displacement_string_end = '0';
10221     }
10222 #endif
10223   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10224   if (gotfree_input_line)
10225     input_line_pointer = gotfree_input_line;
10226
10227   exp_seg = expression (exp);
10228
10229   SKIP_WHITESPACE ();
10230   if (*input_line_pointer)
10231     as_bad (_("junk `%s' after expression"), input_line_pointer);
10232 #if GCC_ASM_O_HACK
10233   RESTORE_END_STRING (disp_end + 1);
10234 #endif
10235   input_line_pointer = save_input_line_pointer;
10236   if (gotfree_input_line)
10237     {
10238       free (gotfree_input_line);
10239
10240       if (exp->X_op == O_constant || exp->X_op == O_register)
10241         exp->X_op = O_illegal;
10242     }
10243
10244   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10245
10246   RESTORE_END_STRING (disp_end);
10247
10248   return ret;
10249 }
10250
10251 static int
10252 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10253                             i386_operand_type types, const char *disp_start)
10254 {
10255   i386_operand_type bigdisp;
10256   int ret = 1;
10257
10258   /* We do this to make sure that the section symbol is in
10259      the symbol table.  We will ultimately change the relocation
10260      to be relative to the beginning of the section.  */
10261   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10262       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10263       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10264     {
10265       if (exp->X_op != O_symbol)
10266         goto inv_disp;
10267
10268       if (S_IS_LOCAL (exp->X_add_symbol)
10269           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
10270           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
10271         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
10272       exp->X_op = O_subtract;
10273       exp->X_op_symbol = GOT_symbol;
10274       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
10275         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
10276       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10277         i.reloc[this_operand] = BFD_RELOC_64;
10278       else
10279         i.reloc[this_operand] = BFD_RELOC_32;
10280     }
10281
10282   else if (exp->X_op == O_absent
10283            || exp->X_op == O_illegal
10284            || exp->X_op == O_big)
10285     {
10286     inv_disp:
10287       as_bad (_("missing or invalid displacement expression `%s'"),
10288               disp_start);
10289       ret = 0;
10290     }
10291
10292   else if (flag_code == CODE_64BIT
10293            && !i.prefix[ADDR_PREFIX]
10294            && exp->X_op == O_constant)
10295     {
10296       /* Since displacement is signed extended to 64bit, don't allow
10297          disp32 and turn off disp32s if they are out of range.  */
10298       i.types[this_operand].bitfield.disp32 = 0;
10299       if (!fits_in_signed_long (exp->X_add_number))
10300         {
10301           i.types[this_operand].bitfield.disp32s = 0;
10302           if (i.types[this_operand].bitfield.baseindex)
10303             {
10304               as_bad (_("0x%lx out range of signed 32bit displacement"),
10305                       (long) exp->X_add_number);
10306               ret = 0;
10307             }
10308         }
10309     }
10310
10311 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10312   else if (exp->X_op != O_constant
10313            && OUTPUT_FLAVOR == bfd_target_aout_flavour
10314            && exp_seg != absolute_section
10315            && exp_seg != text_section
10316            && exp_seg != data_section
10317            && exp_seg != bss_section
10318            && exp_seg != undefined_section
10319            && !bfd_is_com_section (exp_seg))
10320     {
10321       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10322       ret = 0;
10323     }
10324 #endif
10325
10326   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
10327       /* Constants get taken care of by optimize_disp().  */
10328       && exp->X_op != O_constant)
10329     i.types[this_operand].bitfield.disp8 = 1;
10330
10331   /* Check if this is a displacement only operand.  */
10332   bigdisp = i.types[this_operand];
10333   bigdisp.bitfield.disp8 = 0;
10334   bigdisp.bitfield.disp16 = 0;
10335   bigdisp.bitfield.disp32 = 0;
10336   bigdisp.bitfield.disp32s = 0;
10337   bigdisp.bitfield.disp64 = 0;
10338   if (operand_type_all_zero (&bigdisp))
10339     i.types[this_operand] = operand_type_and (i.types[this_operand],
10340                                               types);
10341
10342   return ret;
10343 }
10344
10345 /* Return the active addressing mode, taking address override and
10346    registers forming the address into consideration.  Update the
10347    address override prefix if necessary.  */
10348
10349 static enum flag_code
10350 i386_addressing_mode (void)
10351 {
10352   enum flag_code addr_mode;
10353
10354   if (i.prefix[ADDR_PREFIX])
10355     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
10356   else if (flag_code == CODE_16BIT
10357            && current_templates->start->cpu_flags.bitfield.cpumpx
10358            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
10359               from md_assemble() by "is not a valid base/index expression"
10360               when there is a base and/or index.  */
10361            && !i.types[this_operand].bitfield.baseindex)
10362     {
10363       /* MPX insn memory operands with neither base nor index must be forced
10364          to use 32-bit addressing in 16-bit mode.  */
10365       addr_mode = CODE_32BIT;
10366       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10367       ++i.prefixes;
10368       gas_assert (!i.types[this_operand].bitfield.disp16);
10369       gas_assert (!i.types[this_operand].bitfield.disp32);
10370     }
10371   else
10372     {
10373       addr_mode = flag_code;
10374
10375 #if INFER_ADDR_PREFIX
10376       if (i.mem_operands == 0)
10377         {
10378           /* Infer address prefix from the first memory operand.  */
10379           const reg_entry *addr_reg = i.base_reg;
10380
10381           if (addr_reg == NULL)
10382             addr_reg = i.index_reg;
10383
10384           if (addr_reg)
10385             {
10386               if (addr_reg->reg_type.bitfield.dword)
10387                 addr_mode = CODE_32BIT;
10388               else if (flag_code != CODE_64BIT
10389                        && addr_reg->reg_type.bitfield.word)
10390                 addr_mode = CODE_16BIT;
10391
10392               if (addr_mode != flag_code)
10393                 {
10394                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10395                   i.prefixes += 1;
10396                   /* Change the size of any displacement too.  At most one
10397                      of Disp16 or Disp32 is set.
10398                      FIXME.  There doesn't seem to be any real need for
10399                      separate Disp16 and Disp32 flags.  The same goes for
10400                      Imm16 and Imm32.  Removing them would probably clean
10401                      up the code quite a lot.  */
10402                   if (flag_code != CODE_64BIT
10403                       && (i.types[this_operand].bitfield.disp16
10404                           || i.types[this_operand].bitfield.disp32))
10405                     i.types[this_operand]
10406                       = operand_type_xor (i.types[this_operand], disp16_32);
10407                 }
10408             }
10409         }
10410 #endif
10411     }
10412
10413   return addr_mode;
10414 }
10415
10416 /* Make sure the memory operand we've been dealt is valid.
10417    Return 1 on success, 0 on a failure.  */
10418
10419 static int
10420 i386_index_check (const char *operand_string)
10421 {
10422   const char *kind = "base/index";
10423   enum flag_code addr_mode = i386_addressing_mode ();
10424
10425   if (current_templates->start->opcode_modifier.isstring
10426       && !current_templates->start->cpu_flags.bitfield.cpupadlock
10427       && (current_templates->end[-1].opcode_modifier.isstring
10428           || i.mem_operands))
10429     {
10430       /* Memory operands of string insns are special in that they only allow
10431          a single register (rDI, rSI, or rBX) as their memory address.  */
10432       const reg_entry *expected_reg;
10433       static const char *di_si[][2] =
10434         {
10435           { "esi", "edi" },
10436           { "si", "di" },
10437           { "rsi", "rdi" }
10438         };
10439       static const char *bx[] = { "ebx", "bx", "rbx" };
10440
10441       kind = "string address";
10442
10443       if (current_templates->start->opcode_modifier.repprefixok)
10444         {
10445           int es_op = current_templates->end[-1].opcode_modifier.isstring
10446                       - IS_STRING_ES_OP0;
10447           int op = 0;
10448
10449           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
10450               || ((!i.mem_operands != !intel_syntax)
10451                   && current_templates->end[-1].operand_types[1]
10452                      .bitfield.baseindex))
10453             op = 1;
10454           expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]);
10455         }
10456       else
10457         expected_reg = hash_find (reg_hash, bx[addr_mode]);
10458
10459       if (i.base_reg != expected_reg
10460           || i.index_reg
10461           || operand_type_check (i.types[this_operand], disp))
10462         {
10463           /* The second memory operand must have the same size as
10464              the first one.  */
10465           if (i.mem_operands
10466               && i.base_reg
10467               && !((addr_mode == CODE_64BIT
10468                     && i.base_reg->reg_type.bitfield.qword)
10469                    || (addr_mode == CODE_32BIT
10470                        ? i.base_reg->reg_type.bitfield.dword
10471                        : i.base_reg->reg_type.bitfield.word)))
10472             goto bad_address;
10473
10474           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
10475                    operand_string,
10476                    intel_syntax ? '[' : '(',
10477                    register_prefix,
10478                    expected_reg->reg_name,
10479                    intel_syntax ? ']' : ')');
10480           return 1;
10481         }
10482       else
10483         return 1;
10484
10485     bad_address:
10486       as_bad (_("`%s' is not a valid %s expression"),
10487               operand_string, kind);
10488       return 0;
10489     }
10490   else
10491     {
10492       if (addr_mode != CODE_16BIT)
10493         {
10494           /* 32-bit/64-bit checks.  */
10495           if ((i.base_reg
10496                && ((addr_mode == CODE_64BIT
10497                     ? !i.base_reg->reg_type.bitfield.qword
10498                     : !i.base_reg->reg_type.bitfield.dword)
10499                    || (i.index_reg && i.base_reg->reg_num == RegIP)
10500                    || i.base_reg->reg_num == RegIZ))
10501               || (i.index_reg
10502                   && !i.index_reg->reg_type.bitfield.xmmword
10503                   && !i.index_reg->reg_type.bitfield.ymmword
10504                   && !i.index_reg->reg_type.bitfield.zmmword
10505                   && ((addr_mode == CODE_64BIT
10506                        ? !i.index_reg->reg_type.bitfield.qword
10507                        : !i.index_reg->reg_type.bitfield.dword)
10508                       || !i.index_reg->reg_type.bitfield.baseindex)))
10509             goto bad_address;
10510
10511           /* bndmk, bndldx, and bndstx have special restrictions. */
10512           if (current_templates->start->base_opcode == 0xf30f1b
10513               || (current_templates->start->base_opcode & ~1) == 0x0f1a)
10514             {
10515               /* They cannot use RIP-relative addressing. */
10516               if (i.base_reg && i.base_reg->reg_num == RegIP)
10517                 {
10518                   as_bad (_("`%s' cannot be used here"), operand_string);
10519                   return 0;
10520                 }
10521
10522               /* bndldx and bndstx ignore their scale factor. */
10523               if (current_templates->start->base_opcode != 0xf30f1b
10524                   && i.log2_scale_factor)
10525                 as_warn (_("register scaling is being ignored here"));
10526             }
10527         }
10528       else
10529         {
10530           /* 16-bit checks.  */
10531           if ((i.base_reg
10532                && (!i.base_reg->reg_type.bitfield.word
10533                    || !i.base_reg->reg_type.bitfield.baseindex))
10534               || (i.index_reg
10535                   && (!i.index_reg->reg_type.bitfield.word
10536                       || !i.index_reg->reg_type.bitfield.baseindex
10537                       || !(i.base_reg
10538                            && i.base_reg->reg_num < 6
10539                            && i.index_reg->reg_num >= 6
10540                            && i.log2_scale_factor == 0))))
10541             goto bad_address;
10542         }
10543     }
10544   return 1;
10545 }
10546
10547 /* Handle vector immediates.  */
10548
10549 static int
10550 RC_SAE_immediate (const char *imm_start)
10551 {
10552   unsigned int match_found, j;
10553   const char *pstr = imm_start;
10554   expressionS *exp;
10555
10556   if (*pstr != '{')
10557     return 0;
10558
10559   pstr++;
10560   match_found = 0;
10561   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10562     {
10563       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10564         {
10565           if (!i.rounding)
10566             {
10567               rc_op.type = RC_NamesTable[j].type;
10568               rc_op.operand = this_operand;
10569               i.rounding = &rc_op;
10570             }
10571           else
10572             {
10573               as_bad (_("duplicated `%s'"), imm_start);
10574               return 0;
10575             }
10576           pstr += RC_NamesTable[j].len;
10577           match_found = 1;
10578           break;
10579         }
10580     }
10581   if (!match_found)
10582     return 0;
10583
10584   if (*pstr++ != '}')
10585     {
10586       as_bad (_("Missing '}': '%s'"), imm_start);
10587       return 0;
10588     }
10589   /* RC/SAE immediate string should contain nothing more.  */;
10590   if (*pstr != 0)
10591     {
10592       as_bad (_("Junk after '}': '%s'"), imm_start);
10593       return 0;
10594     }
10595
10596   exp = &im_expressions[i.imm_operands++];
10597   i.op[this_operand].imms = exp;
10598
10599   exp->X_op = O_constant;
10600   exp->X_add_number = 0;
10601   exp->X_add_symbol = (symbolS *) 0;
10602   exp->X_op_symbol = (symbolS *) 0;
10603
10604   i.types[this_operand].bitfield.imm8 = 1;
10605   return 1;
10606 }
10607
10608 /* Only string instructions can have a second memory operand, so
10609    reduce current_templates to just those if it contains any.  */
10610 static int
10611 maybe_adjust_templates (void)
10612 {
10613   const insn_template *t;
10614
10615   gas_assert (i.mem_operands == 1);
10616
10617   for (t = current_templates->start; t < current_templates->end; ++t)
10618     if (t->opcode_modifier.isstring)
10619       break;
10620
10621   if (t < current_templates->end)
10622     {
10623       static templates aux_templates;
10624       bfd_boolean recheck;
10625
10626       aux_templates.start = t;
10627       for (; t < current_templates->end; ++t)
10628         if (!t->opcode_modifier.isstring)
10629           break;
10630       aux_templates.end = t;
10631
10632       /* Determine whether to re-check the first memory operand.  */
10633       recheck = (aux_templates.start != current_templates->start
10634                  || t != current_templates->end);
10635
10636       current_templates = &aux_templates;
10637
10638       if (recheck)
10639         {
10640           i.mem_operands = 0;
10641           if (i.memop1_string != NULL
10642               && i386_index_check (i.memop1_string) == 0)
10643             return 0;
10644           i.mem_operands = 1;
10645         }
10646     }
10647
10648   return 1;
10649 }
10650
10651 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
10652    on error.  */
10653
10654 static int
10655 i386_att_operand (char *operand_string)
10656 {
10657   const reg_entry *r;
10658   char *end_op;
10659   char *op_string = operand_string;
10660
10661   if (is_space_char (*op_string))
10662     ++op_string;
10663
10664   /* We check for an absolute prefix (differentiating,
10665      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
10666   if (*op_string == ABSOLUTE_PREFIX)
10667     {
10668       ++op_string;
10669       if (is_space_char (*op_string))
10670         ++op_string;
10671       i.jumpabsolute = TRUE;
10672     }
10673
10674   /* Check if operand is a register.  */
10675   if ((r = parse_register (op_string, &end_op)) != NULL)
10676     {
10677       i386_operand_type temp;
10678
10679       /* Check for a segment override by searching for ':' after a
10680          segment register.  */
10681       op_string = end_op;
10682       if (is_space_char (*op_string))
10683         ++op_string;
10684       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
10685         {
10686           switch (r->reg_num)
10687             {
10688             case 0:
10689               i.seg[i.mem_operands] = &es;
10690               break;
10691             case 1:
10692               i.seg[i.mem_operands] = &cs;
10693               break;
10694             case 2:
10695               i.seg[i.mem_operands] = &ss;
10696               break;
10697             case 3:
10698               i.seg[i.mem_operands] = &ds;
10699               break;
10700             case 4:
10701               i.seg[i.mem_operands] = &fs;
10702               break;
10703             case 5:
10704               i.seg[i.mem_operands] = &gs;
10705               break;
10706             }
10707
10708           /* Skip the ':' and whitespace.  */
10709           ++op_string;
10710           if (is_space_char (*op_string))
10711             ++op_string;
10712
10713           if (!is_digit_char (*op_string)
10714               && !is_identifier_char (*op_string)
10715               && *op_string != '('
10716               && *op_string != ABSOLUTE_PREFIX)
10717             {
10718               as_bad (_("bad memory operand `%s'"), op_string);
10719               return 0;
10720             }
10721           /* Handle case of %es:*foo.  */
10722           if (*op_string == ABSOLUTE_PREFIX)
10723             {
10724               ++op_string;
10725               if (is_space_char (*op_string))
10726                 ++op_string;
10727               i.jumpabsolute = TRUE;
10728             }
10729           goto do_memory_reference;
10730         }
10731
10732       /* Handle vector operations.  */
10733       if (*op_string == '{')
10734         {
10735           op_string = check_VecOperations (op_string, NULL);
10736           if (op_string == NULL)
10737             return 0;
10738         }
10739
10740       if (*op_string)
10741         {
10742           as_bad (_("junk `%s' after register"), op_string);
10743           return 0;
10744         }
10745       temp = r->reg_type;
10746       temp.bitfield.baseindex = 0;
10747       i.types[this_operand] = operand_type_or (i.types[this_operand],
10748                                                temp);
10749       i.types[this_operand].bitfield.unspecified = 0;
10750       i.op[this_operand].regs = r;
10751       i.reg_operands++;
10752     }
10753   else if (*op_string == REGISTER_PREFIX)
10754     {
10755       as_bad (_("bad register name `%s'"), op_string);
10756       return 0;
10757     }
10758   else if (*op_string == IMMEDIATE_PREFIX)
10759     {
10760       ++op_string;
10761       if (i.jumpabsolute)
10762         {
10763           as_bad (_("immediate operand illegal with absolute jump"));
10764           return 0;
10765         }
10766       if (!i386_immediate (op_string))
10767         return 0;
10768     }
10769   else if (RC_SAE_immediate (operand_string))
10770     {
10771       /* If it is a RC or SAE immediate, do nothing.  */
10772       ;
10773     }
10774   else if (is_digit_char (*op_string)
10775            || is_identifier_char (*op_string)
10776            || *op_string == '"'
10777            || *op_string == '(')
10778     {
10779       /* This is a memory reference of some sort.  */
10780       char *base_string;
10781
10782       /* Start and end of displacement string expression (if found).  */
10783       char *displacement_string_start;
10784       char *displacement_string_end;
10785       char *vop_start;
10786
10787     do_memory_reference:
10788       if (i.mem_operands == 1 && !maybe_adjust_templates ())
10789         return 0;
10790       if ((i.mem_operands == 1
10791            && !current_templates->start->opcode_modifier.isstring)
10792           || i.mem_operands == 2)
10793         {
10794           as_bad (_("too many memory references for `%s'"),
10795                   current_templates->start->name);
10796           return 0;
10797         }
10798
10799       /* Check for base index form.  We detect the base index form by
10800          looking for an ')' at the end of the operand, searching
10801          for the '(' matching it, and finding a REGISTER_PREFIX or ','
10802          after the '('.  */
10803       base_string = op_string + strlen (op_string);
10804
10805       /* Handle vector operations.  */
10806       vop_start = strchr (op_string, '{');
10807       if (vop_start && vop_start < base_string)
10808         {
10809           if (check_VecOperations (vop_start, base_string) == NULL)
10810             return 0;
10811           base_string = vop_start;
10812         }
10813
10814       --base_string;
10815       if (is_space_char (*base_string))
10816         --base_string;
10817
10818       /* If we only have a displacement, set-up for it to be parsed later.  */
10819       displacement_string_start = op_string;
10820       displacement_string_end = base_string + 1;
10821
10822       if (*base_string == ')')
10823         {
10824           char *temp_string;
10825           unsigned int parens_balanced = 1;
10826           /* We've already checked that the number of left & right ()'s are
10827              equal, so this loop will not be infinite.  */
10828           do
10829             {
10830               base_string--;
10831               if (*base_string == ')')
10832                 parens_balanced++;
10833               if (*base_string == '(')
10834                 parens_balanced--;
10835             }
10836           while (parens_balanced);
10837
10838           temp_string = base_string;
10839
10840           /* Skip past '(' and whitespace.  */
10841           ++base_string;
10842           if (is_space_char (*base_string))
10843             ++base_string;
10844
10845           if (*base_string == ','
10846               || ((i.base_reg = parse_register (base_string, &end_op))
10847                   != NULL))
10848             {
10849               displacement_string_end = temp_string;
10850
10851               i.types[this_operand].bitfield.baseindex = 1;
10852
10853               if (i.base_reg)
10854                 {
10855                   base_string = end_op;
10856                   if (is_space_char (*base_string))
10857                     ++base_string;
10858                 }
10859
10860               /* There may be an index reg or scale factor here.  */
10861               if (*base_string == ',')
10862                 {
10863                   ++base_string;
10864                   if (is_space_char (*base_string))
10865                     ++base_string;
10866
10867                   if ((i.index_reg = parse_register (base_string, &end_op))
10868                       != NULL)
10869                     {
10870                       base_string = end_op;
10871                       if (is_space_char (*base_string))
10872                         ++base_string;
10873                       if (*base_string == ',')
10874                         {
10875                           ++base_string;
10876                           if (is_space_char (*base_string))
10877                             ++base_string;
10878                         }
10879                       else if (*base_string != ')')
10880                         {
10881                           as_bad (_("expecting `,' or `)' "
10882                                     "after index register in `%s'"),
10883                                   operand_string);
10884                           return 0;
10885                         }
10886                     }
10887                   else if (*base_string == REGISTER_PREFIX)
10888                     {
10889                       end_op = strchr (base_string, ',');
10890                       if (end_op)
10891                         *end_op = '\0';
10892                       as_bad (_("bad register name `%s'"), base_string);
10893                       return 0;
10894                     }
10895
10896                   /* Check for scale factor.  */
10897                   if (*base_string != ')')
10898                     {
10899                       char *end_scale = i386_scale (base_string);
10900
10901                       if (!end_scale)
10902                         return 0;
10903
10904                       base_string = end_scale;
10905                       if (is_space_char (*base_string))
10906                         ++base_string;
10907                       if (*base_string != ')')
10908                         {
10909                           as_bad (_("expecting `)' "
10910                                     "after scale factor in `%s'"),
10911                                   operand_string);
10912                           return 0;
10913                         }
10914                     }
10915                   else if (!i.index_reg)
10916                     {
10917                       as_bad (_("expecting index register or scale factor "
10918                                 "after `,'; got '%c'"),
10919                               *base_string);
10920                       return 0;
10921                     }
10922                 }
10923               else if (*base_string != ')')
10924                 {
10925                   as_bad (_("expecting `,' or `)' "
10926                             "after base register in `%s'"),
10927                           operand_string);
10928                   return 0;
10929                 }
10930             }
10931           else if (*base_string == REGISTER_PREFIX)
10932             {
10933               end_op = strchr (base_string, ',');
10934               if (end_op)
10935                 *end_op = '\0';
10936               as_bad (_("bad register name `%s'"), base_string);
10937               return 0;
10938             }
10939         }
10940
10941       /* If there's an expression beginning the operand, parse it,
10942          assuming displacement_string_start and
10943          displacement_string_end are meaningful.  */
10944       if (displacement_string_start != displacement_string_end)
10945         {
10946           if (!i386_displacement (displacement_string_start,
10947                                   displacement_string_end))
10948             return 0;
10949         }
10950
10951       /* Special case for (%dx) while doing input/output op.  */
10952       if (i.base_reg
10953           && i.base_reg->reg_type.bitfield.instance == RegD
10954           && i.base_reg->reg_type.bitfield.word
10955           && i.index_reg == 0
10956           && i.log2_scale_factor == 0
10957           && i.seg[i.mem_operands] == 0
10958           && !operand_type_check (i.types[this_operand], disp))
10959         {
10960           i.types[this_operand] = i.base_reg->reg_type;
10961           return 1;
10962         }
10963
10964       if (i386_index_check (operand_string) == 0)
10965         return 0;
10966       i.flags[this_operand] |= Operand_Mem;
10967       if (i.mem_operands == 0)
10968         i.memop1_string = xstrdup (operand_string);
10969       i.mem_operands++;
10970     }
10971   else
10972     {
10973       /* It's not a memory operand; argh!  */
10974       as_bad (_("invalid char %s beginning operand %d `%s'"),
10975               output_invalid (*op_string),
10976               this_operand + 1,
10977               op_string);
10978       return 0;
10979     }
10980   return 1;                     /* Normal return.  */
10981 }
10982 \f
10983 /* Calculate the maximum variable size (i.e., excluding fr_fix)
10984    that an rs_machine_dependent frag may reach.  */
10985
10986 unsigned int
10987 i386_frag_max_var (fragS *frag)
10988 {
10989   /* The only relaxable frags are for jumps.
10990      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
10991   gas_assert (frag->fr_type == rs_machine_dependent);
10992   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
10993 }
10994
10995 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10996 static int
10997 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
10998 {
10999   /* STT_GNU_IFUNC symbol must go through PLT.  */
11000   if ((symbol_get_bfdsym (fr_symbol)->flags
11001        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11002     return 0;
11003
11004   if (!S_IS_EXTERNAL (fr_symbol))
11005     /* Symbol may be weak or local.  */
11006     return !S_IS_WEAK (fr_symbol);
11007
11008   /* Global symbols with non-default visibility can't be preempted. */
11009   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11010     return 1;
11011
11012   if (fr_var != NO_RELOC)
11013     switch ((enum bfd_reloc_code_real) fr_var)
11014       {
11015       case BFD_RELOC_386_PLT32:
11016       case BFD_RELOC_X86_64_PLT32:
11017         /* Symbol with PLT relocation may be preempted. */
11018         return 0;
11019       default:
11020         abort ();
11021       }
11022
11023   /* Global symbols with default visibility in a shared library may be
11024      preempted by another definition.  */
11025   return !shared;
11026 }
11027 #endif
11028
11029 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11030    Note also work for Skylake and Cascadelake.
11031 ---------------------------------------------------------------------
11032 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11033 | ------  | ----------- | ------- | -------- |
11034 |   Jo    |      N      |    N    |     Y    |
11035 |   Jno   |      N      |    N    |     Y    |
11036 |  Jc/Jb  |      Y      |    N    |     Y    |
11037 | Jae/Jnb |      Y      |    N    |     Y    |
11038 |  Je/Jz  |      Y      |    Y    |     Y    |
11039 | Jne/Jnz |      Y      |    Y    |     Y    |
11040 | Jna/Jbe |      Y      |    N    |     Y    |
11041 | Ja/Jnbe |      Y      |    N    |     Y    |
11042 |   Js    |      N      |    N    |     Y    |
11043 |   Jns   |      N      |    N    |     Y    |
11044 |  Jp/Jpe |      N      |    N    |     Y    |
11045 | Jnp/Jpo |      N      |    N    |     Y    |
11046 | Jl/Jnge |      Y      |    Y    |     Y    |
11047 | Jge/Jnl |      Y      |    Y    |     Y    |
11048 | Jle/Jng |      Y      |    Y    |     Y    |
11049 | Jg/Jnle |      Y      |    Y    |     Y    |
11050 ---------------------------------------------------------------------  */
11051 static int
11052 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11053 {
11054   if (mf_cmp == mf_cmp_alu_cmp)
11055     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11056             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11057   if (mf_cmp == mf_cmp_incdec)
11058     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11059             || mf_jcc == mf_jcc_jle);
11060   if (mf_cmp == mf_cmp_test_and)
11061     return 1;
11062   return 0;
11063 }
11064
11065 /* Return the next non-empty frag.  */
11066
11067 static fragS *
11068 i386_next_non_empty_frag (fragS *fragP)
11069 {
11070   /* There may be a frag with a ".fill 0" when there is no room in
11071      the current frag for frag_grow in output_insn.  */
11072   for (fragP = fragP->fr_next;
11073        (fragP != NULL
11074         && fragP->fr_type == rs_fill
11075         && fragP->fr_fix == 0);
11076        fragP = fragP->fr_next)
11077     ;
11078   return fragP;
11079 }
11080
11081 /* Return the next jcc frag after BRANCH_PADDING.  */
11082
11083 static fragS *
11084 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11085 {
11086   fragS *branch_fragP;
11087   if (!pad_fragP)
11088     return NULL;
11089
11090   if (pad_fragP->fr_type == rs_machine_dependent
11091       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11092           == BRANCH_PADDING))
11093     {
11094       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11095       if (branch_fragP->fr_type != rs_machine_dependent)
11096         return NULL;
11097       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11098           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11099                                    pad_fragP->tc_frag_data.mf_type))
11100         return branch_fragP;
11101     }
11102
11103   return NULL;
11104 }
11105
11106 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11107
11108 static void
11109 i386_classify_machine_dependent_frag (fragS *fragP)
11110 {
11111   fragS *cmp_fragP;
11112   fragS *pad_fragP;
11113   fragS *branch_fragP;
11114   fragS *next_fragP;
11115   unsigned int max_prefix_length;
11116
11117   if (fragP->tc_frag_data.classified)
11118     return;
11119
11120   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11121      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11122   for (next_fragP = fragP;
11123        next_fragP != NULL;
11124        next_fragP = next_fragP->fr_next)
11125     {
11126       next_fragP->tc_frag_data.classified = 1;
11127       if (next_fragP->fr_type == rs_machine_dependent)
11128         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11129           {
11130           case BRANCH_PADDING:
11131             /* The BRANCH_PADDING frag must be followed by a branch
11132                frag.  */
11133             branch_fragP = i386_next_non_empty_frag (next_fragP);
11134             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11135             break;
11136           case FUSED_JCC_PADDING:
11137             /* Check if this is a fused jcc:
11138                FUSED_JCC_PADDING
11139                CMP like instruction
11140                BRANCH_PADDING
11141                COND_JUMP
11142                */
11143             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11144             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11145             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
11146             if (branch_fragP)
11147               {
11148                 /* The BRANCH_PADDING frag is merged with the
11149                    FUSED_JCC_PADDING frag.  */
11150                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11151                 /* CMP like instruction size.  */
11152                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
11153                 frag_wane (pad_fragP);
11154                 /* Skip to branch_fragP.  */
11155                 next_fragP = branch_fragP;
11156               }
11157             else if (next_fragP->tc_frag_data.max_prefix_length)
11158               {
11159                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
11160                    a fused jcc.  */
11161                 next_fragP->fr_subtype
11162                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
11163                 next_fragP->tc_frag_data.max_bytes
11164                   = next_fragP->tc_frag_data.max_prefix_length;
11165                 /* This will be updated in the BRANCH_PREFIX scan.  */
11166                 next_fragP->tc_frag_data.max_prefix_length = 0;
11167               }
11168             else
11169               frag_wane (next_fragP);
11170             break;
11171           }
11172     }
11173
11174   /* Stop if there is no BRANCH_PREFIX.  */
11175   if (!align_branch_prefix_size)
11176     return;
11177
11178   /* Scan for BRANCH_PREFIX.  */
11179   for (; fragP != NULL; fragP = fragP->fr_next)
11180     {
11181       if (fragP->fr_type != rs_machine_dependent
11182           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11183               != BRANCH_PREFIX))
11184         continue;
11185
11186       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
11187          COND_JUMP_PREFIX.  */
11188       max_prefix_length = 0;
11189       for (next_fragP = fragP;
11190            next_fragP != NULL;
11191            next_fragP = next_fragP->fr_next)
11192         {
11193           if (next_fragP->fr_type == rs_fill)
11194             /* Skip rs_fill frags.  */
11195             continue;
11196           else if (next_fragP->fr_type != rs_machine_dependent)
11197             /* Stop for all other frags.  */
11198             break;
11199
11200           /* rs_machine_dependent frags.  */
11201           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11202               == BRANCH_PREFIX)
11203             {
11204               /* Count BRANCH_PREFIX frags.  */
11205               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
11206                 {
11207                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
11208                   frag_wane (next_fragP);
11209                 }
11210               else
11211                 max_prefix_length
11212                   += next_fragP->tc_frag_data.max_bytes;
11213             }
11214           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11215                     == BRANCH_PADDING)
11216                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11217                        == FUSED_JCC_PADDING))
11218             {
11219               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11220               fragP->tc_frag_data.u.padding_fragP = next_fragP;
11221               break;
11222             }
11223           else
11224             /* Stop for other rs_machine_dependent frags.  */
11225             break;
11226         }
11227
11228       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11229
11230       /* Skip to the next frag.  */
11231       fragP = next_fragP;
11232     }
11233 }
11234
11235 /* Compute padding size for
11236
11237         FUSED_JCC_PADDING
11238         CMP like instruction
11239         BRANCH_PADDING
11240         COND_JUMP/UNCOND_JUMP
11241
11242    or
11243
11244         BRANCH_PADDING
11245         COND_JUMP/UNCOND_JUMP
11246  */
11247
11248 static int
11249 i386_branch_padding_size (fragS *fragP, offsetT address)
11250 {
11251   unsigned int offset, size, padding_size;
11252   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11253
11254   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
11255   if (!address)
11256     address = fragP->fr_address;
11257   address += fragP->fr_fix;
11258
11259   /* CMP like instrunction size.  */
11260   size = fragP->tc_frag_data.cmp_size;
11261
11262   /* The base size of the branch frag.  */
11263   size += branch_fragP->fr_fix;
11264
11265   /* Add opcode and displacement bytes for the rs_machine_dependent
11266      branch frag.  */
11267   if (branch_fragP->fr_type == rs_machine_dependent)
11268     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
11269
11270   /* Check if branch is within boundary and doesn't end at the last
11271      byte.  */
11272   offset = address & ((1U << align_branch_power) - 1);
11273   if ((offset + size) >= (1U << align_branch_power))
11274     /* Padding needed to avoid crossing boundary.  */
11275     padding_size = (1U << align_branch_power) - offset;
11276   else
11277     /* No padding needed.  */
11278     padding_size = 0;
11279
11280   /* The return value may be saved in tc_frag_data.length which is
11281      unsigned byte.  */
11282   if (!fits_in_unsigned_byte (padding_size))
11283     abort ();
11284
11285   return padding_size;
11286 }
11287
11288 /* i386_generic_table_relax_frag()
11289
11290    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
11291    grow/shrink padding to align branch frags.  Hand others to
11292    relax_frag().  */
11293
11294 long
11295 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
11296 {
11297   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11298       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11299     {
11300       long padding_size = i386_branch_padding_size (fragP, 0);
11301       long grow = padding_size - fragP->tc_frag_data.length;
11302
11303       /* When the BRANCH_PREFIX frag is used, the computed address
11304          must match the actual address and there should be no padding.  */
11305       if (fragP->tc_frag_data.padding_address
11306           && (fragP->tc_frag_data.padding_address != fragP->fr_address
11307               || padding_size))
11308         abort ();
11309
11310       /* Update the padding size.  */
11311       if (grow)
11312         fragP->tc_frag_data.length = padding_size;
11313
11314       return grow;
11315     }
11316   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11317     {
11318       fragS *padding_fragP, *next_fragP;
11319       long padding_size, left_size, last_size;
11320
11321       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11322       if (!padding_fragP)
11323         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
11324         return (fragP->tc_frag_data.length
11325                 - fragP->tc_frag_data.last_length);
11326
11327       /* Compute the relative address of the padding frag in the very
11328         first time where the BRANCH_PREFIX frag sizes are zero.  */
11329       if (!fragP->tc_frag_data.padding_address)
11330         fragP->tc_frag_data.padding_address
11331           = padding_fragP->fr_address - (fragP->fr_address - stretch);
11332
11333       /* First update the last length from the previous interation.  */
11334       left_size = fragP->tc_frag_data.prefix_length;
11335       for (next_fragP = fragP;
11336            next_fragP != padding_fragP;
11337            next_fragP = next_fragP->fr_next)
11338         if (next_fragP->fr_type == rs_machine_dependent
11339             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11340                 == BRANCH_PREFIX))
11341           {
11342             if (left_size)
11343               {
11344                 int max = next_fragP->tc_frag_data.max_bytes;
11345                 if (max)
11346                   {
11347                     int size;
11348                     if (max > left_size)
11349                       size = left_size;
11350                     else
11351                       size = max;
11352                     left_size -= size;
11353                     next_fragP->tc_frag_data.last_length = size;
11354                   }
11355               }
11356             else
11357               next_fragP->tc_frag_data.last_length = 0;
11358           }
11359
11360       /* Check the padding size for the padding frag.  */
11361       padding_size = i386_branch_padding_size
11362         (padding_fragP, (fragP->fr_address
11363                          + fragP->tc_frag_data.padding_address));
11364
11365       last_size = fragP->tc_frag_data.prefix_length;
11366       /* Check if there is change from the last interation.  */
11367       if (padding_size == last_size)
11368         {
11369           /* Update the expected address of the padding frag.  */
11370           padding_fragP->tc_frag_data.padding_address
11371             = (fragP->fr_address + padding_size
11372                + fragP->tc_frag_data.padding_address);
11373           return 0;
11374         }
11375
11376       if (padding_size > fragP->tc_frag_data.max_prefix_length)
11377         {
11378           /* No padding if there is no sufficient room.  Clear the
11379              expected address of the padding frag.  */
11380           padding_fragP->tc_frag_data.padding_address = 0;
11381           padding_size = 0;
11382         }
11383       else
11384         /* Store the expected address of the padding frag.  */
11385         padding_fragP->tc_frag_data.padding_address
11386           = (fragP->fr_address + padding_size
11387              + fragP->tc_frag_data.padding_address);
11388
11389       fragP->tc_frag_data.prefix_length = padding_size;
11390
11391       /* Update the length for the current interation.  */
11392       left_size = padding_size;
11393       for (next_fragP = fragP;
11394            next_fragP != padding_fragP;
11395            next_fragP = next_fragP->fr_next)
11396         if (next_fragP->fr_type == rs_machine_dependent
11397             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11398                 == BRANCH_PREFIX))
11399           {
11400             if (left_size)
11401               {
11402                 int max = next_fragP->tc_frag_data.max_bytes;
11403                 if (max)
11404                   {
11405                     int size;
11406                     if (max > left_size)
11407                       size = left_size;
11408                     else
11409                       size = max;
11410                     left_size -= size;
11411                     next_fragP->tc_frag_data.length = size;
11412                   }
11413               }
11414             else
11415               next_fragP->tc_frag_data.length = 0;
11416           }
11417
11418       return (fragP->tc_frag_data.length
11419               - fragP->tc_frag_data.last_length);
11420     }
11421   return relax_frag (segment, fragP, stretch);
11422 }
11423
11424 /* md_estimate_size_before_relax()
11425
11426    Called just before relax() for rs_machine_dependent frags.  The x86
11427    assembler uses these frags to handle variable size jump
11428    instructions.
11429
11430    Any symbol that is now undefined will not become defined.
11431    Return the correct fr_subtype in the frag.
11432    Return the initial "guess for variable size of frag" to caller.
11433    The guess is actually the growth beyond the fixed part.  Whatever
11434    we do to grow the fixed or variable part contributes to our
11435    returned value.  */
11436
11437 int
11438 md_estimate_size_before_relax (fragS *fragP, segT segment)
11439 {
11440   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11441       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
11442       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11443     {
11444       i386_classify_machine_dependent_frag (fragP);
11445       return fragP->tc_frag_data.length;
11446     }
11447
11448   /* We've already got fragP->fr_subtype right;  all we have to do is
11449      check for un-relaxable symbols.  On an ELF system, we can't relax
11450      an externally visible symbol, because it may be overridden by a
11451      shared library.  */
11452   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
11453 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11454       || (IS_ELF
11455           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
11456                                                 fragP->fr_var))
11457 #endif
11458 #if defined (OBJ_COFF) && defined (TE_PE)
11459       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
11460           && S_IS_WEAK (fragP->fr_symbol))
11461 #endif
11462       )
11463     {
11464       /* Symbol is undefined in this segment, or we need to keep a
11465          reloc so that weak symbols can be overridden.  */
11466       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
11467       enum bfd_reloc_code_real reloc_type;
11468       unsigned char *opcode;
11469       int old_fr_fix;
11470
11471       if (fragP->fr_var != NO_RELOC)
11472         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
11473       else if (size == 2)
11474         reloc_type = BFD_RELOC_16_PCREL;
11475 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11476       else if (need_plt32_p (fragP->fr_symbol))
11477         reloc_type = BFD_RELOC_X86_64_PLT32;
11478 #endif
11479       else
11480         reloc_type = BFD_RELOC_32_PCREL;
11481
11482       old_fr_fix = fragP->fr_fix;
11483       opcode = (unsigned char *) fragP->fr_opcode;
11484
11485       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
11486         {
11487         case UNCOND_JUMP:
11488           /* Make jmp (0xeb) a (d)word displacement jump.  */
11489           opcode[0] = 0xe9;
11490           fragP->fr_fix += size;
11491           fix_new (fragP, old_fr_fix, size,
11492                    fragP->fr_symbol,
11493                    fragP->fr_offset, 1,
11494                    reloc_type);
11495           break;
11496
11497         case COND_JUMP86:
11498           if (size == 2
11499               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
11500             {
11501               /* Negate the condition, and branch past an
11502                  unconditional jump.  */
11503               opcode[0] ^= 1;
11504               opcode[1] = 3;
11505               /* Insert an unconditional jump.  */
11506               opcode[2] = 0xe9;
11507               /* We added two extra opcode bytes, and have a two byte
11508                  offset.  */
11509               fragP->fr_fix += 2 + 2;
11510               fix_new (fragP, old_fr_fix + 2, 2,
11511                        fragP->fr_symbol,
11512                        fragP->fr_offset, 1,
11513                        reloc_type);
11514               break;
11515             }
11516           /* Fall through.  */
11517
11518         case COND_JUMP:
11519           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
11520             {
11521               fixS *fixP;
11522
11523               fragP->fr_fix += 1;
11524               fixP = fix_new (fragP, old_fr_fix, 1,
11525                               fragP->fr_symbol,
11526                               fragP->fr_offset, 1,
11527                               BFD_RELOC_8_PCREL);
11528               fixP->fx_signed = 1;
11529               break;
11530             }
11531
11532           /* This changes the byte-displacement jump 0x7N
11533              to the (d)word-displacement jump 0x0f,0x8N.  */
11534           opcode[1] = opcode[0] + 0x10;
11535           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11536           /* We've added an opcode byte.  */
11537           fragP->fr_fix += 1 + size;
11538           fix_new (fragP, old_fr_fix + 1, size,
11539                    fragP->fr_symbol,
11540                    fragP->fr_offset, 1,
11541                    reloc_type);
11542           break;
11543
11544         default:
11545           BAD_CASE (fragP->fr_subtype);
11546           break;
11547         }
11548       frag_wane (fragP);
11549       return fragP->fr_fix - old_fr_fix;
11550     }
11551
11552   /* Guess size depending on current relax state.  Initially the relax
11553      state will correspond to a short jump and we return 1, because
11554      the variable part of the frag (the branch offset) is one byte
11555      long.  However, we can relax a section more than once and in that
11556      case we must either set fr_subtype back to the unrelaxed state,
11557      or return the value for the appropriate branch.  */
11558   return md_relax_table[fragP->fr_subtype].rlx_length;
11559 }
11560
11561 /* Called after relax() is finished.
11562
11563    In:  Address of frag.
11564         fr_type == rs_machine_dependent.
11565         fr_subtype is what the address relaxed to.
11566
11567    Out: Any fixSs and constants are set up.
11568         Caller will turn frag into a ".space 0".  */
11569
11570 void
11571 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
11572                  fragS *fragP)
11573 {
11574   unsigned char *opcode;
11575   unsigned char *where_to_put_displacement = NULL;
11576   offsetT target_address;
11577   offsetT opcode_address;
11578   unsigned int extension = 0;
11579   offsetT displacement_from_opcode_start;
11580
11581   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11582       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
11583       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11584     {
11585       /* Generate nop padding.  */
11586       unsigned int size = fragP->tc_frag_data.length;
11587       if (size)
11588         {
11589           if (size > fragP->tc_frag_data.max_bytes)
11590             abort ();
11591
11592           if (flag_debug)
11593             {
11594               const char *msg;
11595               const char *branch = "branch";
11596               const char *prefix = "";
11597               fragS *padding_fragP;
11598               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11599                   == BRANCH_PREFIX)
11600                 {
11601                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11602                   switch (fragP->tc_frag_data.default_prefix)
11603                     {
11604                     default:
11605                       abort ();
11606                       break;
11607                     case CS_PREFIX_OPCODE:
11608                       prefix = " cs";
11609                       break;
11610                     case DS_PREFIX_OPCODE:
11611                       prefix = " ds";
11612                       break;
11613                     case ES_PREFIX_OPCODE:
11614                       prefix = " es";
11615                       break;
11616                     case FS_PREFIX_OPCODE:
11617                       prefix = " fs";
11618                       break;
11619                     case GS_PREFIX_OPCODE:
11620                       prefix = " gs";
11621                       break;
11622                     case SS_PREFIX_OPCODE:
11623                       prefix = " ss";
11624                       break;
11625                     }
11626                   if (padding_fragP)
11627                     msg = _("%s:%u: add %d%s at 0x%llx to align "
11628                             "%s within %d-byte boundary\n");
11629                   else
11630                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
11631                             "align %s within %d-byte boundary\n");
11632                 }
11633               else
11634                 {
11635                   padding_fragP = fragP;
11636                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
11637                           "%s within %d-byte boundary\n");
11638                 }
11639
11640               if (padding_fragP)
11641                 switch (padding_fragP->tc_frag_data.branch_type)
11642                   {
11643                   case align_branch_jcc:
11644                     branch = "jcc";
11645                     break;
11646                   case align_branch_fused:
11647                     branch = "fused jcc";
11648                     break;
11649                   case align_branch_jmp:
11650                     branch = "jmp";
11651                     break;
11652                   case align_branch_call:
11653                     branch = "call";
11654                     break;
11655                   case align_branch_indirect:
11656                     branch = "indiret branch";
11657                     break;
11658                   case align_branch_ret:
11659                     branch = "ret";
11660                     break;
11661                   default:
11662                     break;
11663                   }
11664
11665               fprintf (stdout, msg,
11666                        fragP->fr_file, fragP->fr_line, size, prefix,
11667                        (long long) fragP->fr_address, branch,
11668                        1 << align_branch_power);
11669             }
11670           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11671             memset (fragP->fr_opcode,
11672                     fragP->tc_frag_data.default_prefix, size);
11673           else
11674             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
11675                                 size, 0);
11676           fragP->fr_fix += size;
11677         }
11678       return;
11679     }
11680
11681   opcode = (unsigned char *) fragP->fr_opcode;
11682
11683   /* Address we want to reach in file space.  */
11684   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
11685
11686   /* Address opcode resides at in file space.  */
11687   opcode_address = fragP->fr_address + fragP->fr_fix;
11688
11689   /* Displacement from opcode start to fill into instruction.  */
11690   displacement_from_opcode_start = target_address - opcode_address;
11691
11692   if ((fragP->fr_subtype & BIG) == 0)
11693     {
11694       /* Don't have to change opcode.  */
11695       extension = 1;            /* 1 opcode + 1 displacement  */
11696       where_to_put_displacement = &opcode[1];
11697     }
11698   else
11699     {
11700       if (no_cond_jump_promotion
11701           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
11702         as_warn_where (fragP->fr_file, fragP->fr_line,
11703                        _("long jump required"));
11704
11705       switch (fragP->fr_subtype)
11706         {
11707         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
11708           extension = 4;                /* 1 opcode + 4 displacement  */
11709           opcode[0] = 0xe9;
11710           where_to_put_displacement = &opcode[1];
11711           break;
11712
11713         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
11714           extension = 2;                /* 1 opcode + 2 displacement  */
11715           opcode[0] = 0xe9;
11716           where_to_put_displacement = &opcode[1];
11717           break;
11718
11719         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
11720         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
11721           extension = 5;                /* 2 opcode + 4 displacement  */
11722           opcode[1] = opcode[0] + 0x10;
11723           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11724           where_to_put_displacement = &opcode[2];
11725           break;
11726
11727         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
11728           extension = 3;                /* 2 opcode + 2 displacement  */
11729           opcode[1] = opcode[0] + 0x10;
11730           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11731           where_to_put_displacement = &opcode[2];
11732           break;
11733
11734         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
11735           extension = 4;
11736           opcode[0] ^= 1;
11737           opcode[1] = 3;
11738           opcode[2] = 0xe9;
11739           where_to_put_displacement = &opcode[3];
11740           break;
11741
11742         default:
11743           BAD_CASE (fragP->fr_subtype);
11744           break;
11745         }
11746     }
11747
11748   /* If size if less then four we are sure that the operand fits,
11749      but if it's 4, then it could be that the displacement is larger
11750      then -/+ 2GB.  */
11751   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
11752       && object_64bit
11753       && ((addressT) (displacement_from_opcode_start - extension
11754                       + ((addressT) 1 << 31))
11755           > (((addressT) 2 << 31) - 1)))
11756     {
11757       as_bad_where (fragP->fr_file, fragP->fr_line,
11758                     _("jump target out of range"));
11759       /* Make us emit 0.  */
11760       displacement_from_opcode_start = extension;
11761     }
11762   /* Now put displacement after opcode.  */
11763   md_number_to_chars ((char *) where_to_put_displacement,
11764                       (valueT) (displacement_from_opcode_start - extension),
11765                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
11766   fragP->fr_fix += extension;
11767 }
11768 \f
11769 /* Apply a fixup (fixP) to segment data, once it has been determined
11770    by our caller that we have all the info we need to fix it up.
11771
11772    Parameter valP is the pointer to the value of the bits.
11773
11774    On the 386, immediates, displacements, and data pointers are all in
11775    the same (little-endian) format, so we don't need to care about which
11776    we are handling.  */
11777
11778 void
11779 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
11780 {
11781   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
11782   valueT value = *valP;
11783
11784 #if !defined (TE_Mach)
11785   if (fixP->fx_pcrel)
11786     {
11787       switch (fixP->fx_r_type)
11788         {
11789         default:
11790           break;
11791
11792         case BFD_RELOC_64:
11793           fixP->fx_r_type = BFD_RELOC_64_PCREL;
11794           break;
11795         case BFD_RELOC_32:
11796         case BFD_RELOC_X86_64_32S:
11797           fixP->fx_r_type = BFD_RELOC_32_PCREL;
11798           break;
11799         case BFD_RELOC_16:
11800           fixP->fx_r_type = BFD_RELOC_16_PCREL;
11801           break;
11802         case BFD_RELOC_8:
11803           fixP->fx_r_type = BFD_RELOC_8_PCREL;
11804           break;
11805         }
11806     }
11807
11808   if (fixP->fx_addsy != NULL
11809       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
11810           || fixP->fx_r_type == BFD_RELOC_64_PCREL
11811           || fixP->fx_r_type == BFD_RELOC_16_PCREL
11812           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
11813       && !use_rela_relocations)
11814     {
11815       /* This is a hack.  There should be a better way to handle this.
11816          This covers for the fact that bfd_install_relocation will
11817          subtract the current location (for partial_inplace, PC relative
11818          relocations); see more below.  */
11819 #ifndef OBJ_AOUT
11820       if (IS_ELF
11821 #ifdef TE_PE
11822           || OUTPUT_FLAVOR == bfd_target_coff_flavour
11823 #endif
11824           )
11825         value += fixP->fx_where + fixP->fx_frag->fr_address;
11826 #endif
11827 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11828       if (IS_ELF)
11829         {
11830           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
11831
11832           if ((sym_seg == seg
11833                || (symbol_section_p (fixP->fx_addsy)
11834                    && sym_seg != absolute_section))
11835               && !generic_force_reloc (fixP))
11836             {
11837               /* Yes, we add the values in twice.  This is because
11838                  bfd_install_relocation subtracts them out again.  I think
11839                  bfd_install_relocation is broken, but I don't dare change
11840                  it.  FIXME.  */
11841               value += fixP->fx_where + fixP->fx_frag->fr_address;
11842             }
11843         }
11844 #endif
11845 #if defined (OBJ_COFF) && defined (TE_PE)
11846       /* For some reason, the PE format does not store a
11847          section address offset for a PC relative symbol.  */
11848       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
11849           || S_IS_WEAK (fixP->fx_addsy))
11850         value += md_pcrel_from (fixP);
11851 #endif
11852     }
11853 #if defined (OBJ_COFF) && defined (TE_PE)
11854   if (fixP->fx_addsy != NULL
11855       && S_IS_WEAK (fixP->fx_addsy)
11856       /* PR 16858: Do not modify weak function references.  */
11857       && ! fixP->fx_pcrel)
11858     {
11859 #if !defined (TE_PEP)
11860       /* For x86 PE weak function symbols are neither PC-relative
11861          nor do they set S_IS_FUNCTION.  So the only reliable way
11862          to detect them is to check the flags of their containing
11863          section.  */
11864       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
11865           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
11866         ;
11867       else
11868 #endif
11869       value -= S_GET_VALUE (fixP->fx_addsy);
11870     }
11871 #endif
11872
11873   /* Fix a few things - the dynamic linker expects certain values here,
11874      and we must not disappoint it.  */
11875 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11876   if (IS_ELF && fixP->fx_addsy)
11877     switch (fixP->fx_r_type)
11878       {
11879       case BFD_RELOC_386_PLT32:
11880       case BFD_RELOC_X86_64_PLT32:
11881         /* Make the jump instruction point to the address of the operand.
11882            At runtime we merely add the offset to the actual PLT entry.
11883            NB: Subtract the offset size only for jump instructions.  */
11884         if (fixP->fx_pcrel)
11885           value = -4;
11886         break;
11887
11888       case BFD_RELOC_386_TLS_GD:
11889       case BFD_RELOC_386_TLS_LDM:
11890       case BFD_RELOC_386_TLS_IE_32:
11891       case BFD_RELOC_386_TLS_IE:
11892       case BFD_RELOC_386_TLS_GOTIE:
11893       case BFD_RELOC_386_TLS_GOTDESC:
11894       case BFD_RELOC_X86_64_TLSGD:
11895       case BFD_RELOC_X86_64_TLSLD:
11896       case BFD_RELOC_X86_64_GOTTPOFF:
11897       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
11898         value = 0; /* Fully resolved at runtime.  No addend.  */
11899         /* Fallthrough */
11900       case BFD_RELOC_386_TLS_LE:
11901       case BFD_RELOC_386_TLS_LDO_32:
11902       case BFD_RELOC_386_TLS_LE_32:
11903       case BFD_RELOC_X86_64_DTPOFF32:
11904       case BFD_RELOC_X86_64_DTPOFF64:
11905       case BFD_RELOC_X86_64_TPOFF32:
11906       case BFD_RELOC_X86_64_TPOFF64:
11907         S_SET_THREAD_LOCAL (fixP->fx_addsy);
11908         break;
11909
11910       case BFD_RELOC_386_TLS_DESC_CALL:
11911       case BFD_RELOC_X86_64_TLSDESC_CALL:
11912         value = 0; /* Fully resolved at runtime.  No addend.  */
11913         S_SET_THREAD_LOCAL (fixP->fx_addsy);
11914         fixP->fx_done = 0;
11915         return;
11916
11917       case BFD_RELOC_VTABLE_INHERIT:
11918       case BFD_RELOC_VTABLE_ENTRY:
11919         fixP->fx_done = 0;
11920         return;
11921
11922       default:
11923         break;
11924       }
11925 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
11926   *valP = value;
11927 #endif /* !defined (TE_Mach)  */
11928
11929   /* Are we finished with this relocation now?  */
11930   if (fixP->fx_addsy == NULL)
11931     fixP->fx_done = 1;
11932 #if defined (OBJ_COFF) && defined (TE_PE)
11933   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
11934     {
11935       fixP->fx_done = 0;
11936       /* Remember value for tc_gen_reloc.  */
11937       fixP->fx_addnumber = value;
11938       /* Clear out the frag for now.  */
11939       value = 0;
11940     }
11941 #endif
11942   else if (use_rela_relocations)
11943     {
11944       fixP->fx_no_overflow = 1;
11945       /* Remember value for tc_gen_reloc.  */
11946       fixP->fx_addnumber = value;
11947       value = 0;
11948     }
11949
11950   md_number_to_chars (p, value, fixP->fx_size);
11951 }
11952 \f
11953 const char *
11954 md_atof (int type, char *litP, int *sizeP)
11955 {
11956   /* This outputs the LITTLENUMs in REVERSE order;
11957      in accord with the bigendian 386.  */
11958   return ieee_md_atof (type, litP, sizeP, FALSE);
11959 }
11960 \f
11961 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
11962
11963 static char *
11964 output_invalid (int c)
11965 {
11966   if (ISPRINT (c))
11967     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
11968               "'%c'", c);
11969   else
11970     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
11971               "(0x%x)", (unsigned char) c);
11972   return output_invalid_buf;
11973 }
11974
11975 /* REG_STRING starts *before* REGISTER_PREFIX.  */
11976
11977 static const reg_entry *
11978 parse_real_register (char *reg_string, char **end_op)
11979 {
11980   char *s = reg_string;
11981   char *p;
11982   char reg_name_given[MAX_REG_NAME_SIZE + 1];
11983   const reg_entry *r;
11984
11985   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
11986   if (*s == REGISTER_PREFIX)
11987     ++s;
11988
11989   if (is_space_char (*s))
11990     ++s;
11991
11992   p = reg_name_given;
11993   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
11994     {
11995       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
11996         return (const reg_entry *) NULL;
11997       s++;
11998     }
11999
12000   /* For naked regs, make sure that we are not dealing with an identifier.
12001      This prevents confusing an identifier like `eax_var' with register
12002      `eax'.  */
12003   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12004     return (const reg_entry *) NULL;
12005
12006   *end_op = s;
12007
12008   r = (const reg_entry *) hash_find (reg_hash, reg_name_given);
12009
12010   /* Handle floating point regs, allowing spaces in the (i) part.  */
12011   if (r == i386_regtab /* %st is first entry of table  */)
12012     {
12013       if (!cpu_arch_flags.bitfield.cpu8087
12014           && !cpu_arch_flags.bitfield.cpu287
12015           && !cpu_arch_flags.bitfield.cpu387)
12016         return (const reg_entry *) NULL;
12017
12018       if (is_space_char (*s))
12019         ++s;
12020       if (*s == '(')
12021         {
12022           ++s;
12023           if (is_space_char (*s))
12024             ++s;
12025           if (*s >= '0' && *s <= '7')
12026             {
12027               int fpr = *s - '0';
12028               ++s;
12029               if (is_space_char (*s))
12030                 ++s;
12031               if (*s == ')')
12032                 {
12033                   *end_op = s + 1;
12034                   r = (const reg_entry *) hash_find (reg_hash, "st(0)");
12035                   know (r);
12036                   return r + fpr;
12037                 }
12038             }
12039           /* We have "%st(" then garbage.  */
12040           return (const reg_entry *) NULL;
12041         }
12042     }
12043
12044   if (r == NULL || allow_pseudo_reg)
12045     return r;
12046
12047   if (operand_type_all_zero (&r->reg_type))
12048     return (const reg_entry *) NULL;
12049
12050   if ((r->reg_type.bitfield.dword
12051        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12052        || r->reg_type.bitfield.class == RegCR
12053        || r->reg_type.bitfield.class == RegDR
12054        || r->reg_type.bitfield.class == RegTR)
12055       && !cpu_arch_flags.bitfield.cpui386)
12056     return (const reg_entry *) NULL;
12057
12058   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12059     return (const reg_entry *) NULL;
12060
12061   if (!cpu_arch_flags.bitfield.cpuavx512f)
12062     {
12063       if (r->reg_type.bitfield.zmmword
12064           || r->reg_type.bitfield.class == RegMask)
12065         return (const reg_entry *) NULL;
12066
12067       if (!cpu_arch_flags.bitfield.cpuavx)
12068         {
12069           if (r->reg_type.bitfield.ymmword)
12070             return (const reg_entry *) NULL;
12071
12072           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12073             return (const reg_entry *) NULL;
12074         }
12075     }
12076
12077   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12078     return (const reg_entry *) NULL;
12079
12080   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12081   if (!allow_index_reg && r->reg_num == RegIZ)
12082     return (const reg_entry *) NULL;
12083
12084   /* Upper 16 vector registers are only available with VREX in 64bit
12085      mode, and require EVEX encoding.  */
12086   if (r->reg_flags & RegVRex)
12087     {
12088       if (!cpu_arch_flags.bitfield.cpuavx512f
12089           || flag_code != CODE_64BIT)
12090         return (const reg_entry *) NULL;
12091
12092       i.vec_encoding = vex_encoding_evex;
12093     }
12094
12095   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12096       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12097       && flag_code != CODE_64BIT)
12098     return (const reg_entry *) NULL;
12099
12100   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12101       && !intel_syntax)
12102     return (const reg_entry *) NULL;
12103
12104   return r;
12105 }
12106
12107 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12108
12109 static const reg_entry *
12110 parse_register (char *reg_string, char **end_op)
12111 {
12112   const reg_entry *r;
12113
12114   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
12115     r = parse_real_register (reg_string, end_op);
12116   else
12117     r = NULL;
12118   if (!r)
12119     {
12120       char *save = input_line_pointer;
12121       char c;
12122       symbolS *symbolP;
12123
12124       input_line_pointer = reg_string;
12125       c = get_symbol_name (&reg_string);
12126       symbolP = symbol_find (reg_string);
12127       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
12128         {
12129           const expressionS *e = symbol_get_value_expression (symbolP);
12130
12131           know (e->X_op == O_register);
12132           know (e->X_add_number >= 0
12133                 && (valueT) e->X_add_number < i386_regtab_size);
12134           r = i386_regtab + e->X_add_number;
12135           if ((r->reg_flags & RegVRex))
12136             i.vec_encoding = vex_encoding_evex;
12137           *end_op = input_line_pointer;
12138         }
12139       *input_line_pointer = c;
12140       input_line_pointer = save;
12141     }
12142   return r;
12143 }
12144
12145 int
12146 i386_parse_name (char *name, expressionS *e, char *nextcharP)
12147 {
12148   const reg_entry *r;
12149   char *end = input_line_pointer;
12150
12151   *end = *nextcharP;
12152   r = parse_register (name, &input_line_pointer);
12153   if (r && end <= input_line_pointer)
12154     {
12155       *nextcharP = *input_line_pointer;
12156       *input_line_pointer = 0;
12157       e->X_op = O_register;
12158       e->X_add_number = r - i386_regtab;
12159       return 1;
12160     }
12161   input_line_pointer = end;
12162   *end = 0;
12163   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
12164 }
12165
12166 void
12167 md_operand (expressionS *e)
12168 {
12169   char *end;
12170   const reg_entry *r;
12171
12172   switch (*input_line_pointer)
12173     {
12174     case REGISTER_PREFIX:
12175       r = parse_real_register (input_line_pointer, &end);
12176       if (r)
12177         {
12178           e->X_op = O_register;
12179           e->X_add_number = r - i386_regtab;
12180           input_line_pointer = end;
12181         }
12182       break;
12183
12184     case '[':
12185       gas_assert (intel_syntax);
12186       end = input_line_pointer++;
12187       expression (e);
12188       if (*input_line_pointer == ']')
12189         {
12190           ++input_line_pointer;
12191           e->X_op_symbol = make_expr_symbol (e);
12192           e->X_add_symbol = NULL;
12193           e->X_add_number = 0;
12194           e->X_op = O_index;
12195         }
12196       else
12197         {
12198           e->X_op = O_absent;
12199           input_line_pointer = end;
12200         }
12201       break;
12202     }
12203 }
12204
12205 \f
12206 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12207 const char *md_shortopts = "kVQ:sqnO::";
12208 #else
12209 const char *md_shortopts = "qnO::";
12210 #endif
12211
12212 #define OPTION_32 (OPTION_MD_BASE + 0)
12213 #define OPTION_64 (OPTION_MD_BASE + 1)
12214 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
12215 #define OPTION_MARCH (OPTION_MD_BASE + 3)
12216 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
12217 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
12218 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
12219 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
12220 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
12221 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
12222 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
12223 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
12224 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
12225 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
12226 #define OPTION_X32 (OPTION_MD_BASE + 14)
12227 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
12228 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
12229 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
12230 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
12231 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
12232 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
12233 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
12234 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
12235 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
12236 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
12237 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
12238 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
12239 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
12240 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
12241 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
12242 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
12243
12244 struct option md_longopts[] =
12245 {
12246   {"32", no_argument, NULL, OPTION_32},
12247 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12248      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12249   {"64", no_argument, NULL, OPTION_64},
12250 #endif
12251 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12252   {"x32", no_argument, NULL, OPTION_X32},
12253   {"mshared", no_argument, NULL, OPTION_MSHARED},
12254   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
12255 #endif
12256   {"divide", no_argument, NULL, OPTION_DIVIDE},
12257   {"march", required_argument, NULL, OPTION_MARCH},
12258   {"mtune", required_argument, NULL, OPTION_MTUNE},
12259   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
12260   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
12261   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
12262   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
12263   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
12264   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
12265   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
12266   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
12267   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
12268   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
12269   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
12270   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
12271 # if defined (TE_PE) || defined (TE_PEP)
12272   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
12273 #endif
12274   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
12275   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
12276   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
12277   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
12278   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
12279   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
12280   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
12281   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
12282   {"mamd64", no_argument, NULL, OPTION_MAMD64},
12283   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
12284   {NULL, no_argument, NULL, 0}
12285 };
12286 size_t md_longopts_size = sizeof (md_longopts);
12287
12288 int
12289 md_parse_option (int c, const char *arg)
12290 {
12291   unsigned int j;
12292   char *arch, *next, *saved, *type;
12293
12294   switch (c)
12295     {
12296     case 'n':
12297       optimize_align_code = 0;
12298       break;
12299
12300     case 'q':
12301       quiet_warnings = 1;
12302       break;
12303
12304 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12305       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
12306          should be emitted or not.  FIXME: Not implemented.  */
12307     case 'Q':
12308       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
12309         return 0;
12310       break;
12311
12312       /* -V: SVR4 argument to print version ID.  */
12313     case 'V':
12314       print_version_id ();
12315       break;
12316
12317       /* -k: Ignore for FreeBSD compatibility.  */
12318     case 'k':
12319       break;
12320
12321     case 's':
12322       /* -s: On i386 Solaris, this tells the native assembler to use
12323          .stab instead of .stab.excl.  We always use .stab anyhow.  */
12324       break;
12325
12326     case OPTION_MSHARED:
12327       shared = 1;
12328       break;
12329
12330     case OPTION_X86_USED_NOTE:
12331       if (strcasecmp (arg, "yes") == 0)
12332         x86_used_note = 1;
12333       else if (strcasecmp (arg, "no") == 0)
12334         x86_used_note = 0;
12335       else
12336         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
12337       break;
12338
12339
12340 #endif
12341 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12342      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12343     case OPTION_64:
12344       {
12345         const char **list, **l;
12346
12347         list = bfd_target_list ();
12348         for (l = list; *l != NULL; l++)
12349           if (CONST_STRNEQ (*l, "elf64-x86-64")
12350               || strcmp (*l, "coff-x86-64") == 0
12351               || strcmp (*l, "pe-x86-64") == 0
12352               || strcmp (*l, "pei-x86-64") == 0
12353               || strcmp (*l, "mach-o-x86-64") == 0)
12354             {
12355               default_arch = "x86_64";
12356               break;
12357             }
12358         if (*l == NULL)
12359           as_fatal (_("no compiled in support for x86_64"));
12360         free (list);
12361       }
12362       break;
12363 #endif
12364
12365 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12366     case OPTION_X32:
12367       if (IS_ELF)
12368         {
12369           const char **list, **l;
12370
12371           list = bfd_target_list ();
12372           for (l = list; *l != NULL; l++)
12373             if (CONST_STRNEQ (*l, "elf32-x86-64"))
12374               {
12375                 default_arch = "x86_64:32";
12376                 break;
12377               }
12378           if (*l == NULL)
12379             as_fatal (_("no compiled in support for 32bit x86_64"));
12380           free (list);
12381         }
12382       else
12383         as_fatal (_("32bit x86_64 is only supported for ELF"));
12384       break;
12385 #endif
12386
12387     case OPTION_32:
12388       default_arch = "i386";
12389       break;
12390
12391     case OPTION_DIVIDE:
12392 #ifdef SVR4_COMMENT_CHARS
12393       {
12394         char *n, *t;
12395         const char *s;
12396
12397         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
12398         t = n;
12399         for (s = i386_comment_chars; *s != '\0'; s++)
12400           if (*s != '/')
12401             *t++ = *s;
12402         *t = '\0';
12403         i386_comment_chars = n;
12404       }
12405 #endif
12406       break;
12407
12408     case OPTION_MARCH:
12409       saved = xstrdup (arg);
12410       arch = saved;
12411       /* Allow -march=+nosse.  */
12412       if (*arch == '+')
12413         arch++;
12414       do
12415         {
12416           if (*arch == '.')
12417             as_fatal (_("invalid -march= option: `%s'"), arg);
12418           next = strchr (arch, '+');
12419           if (next)
12420             *next++ = '\0';
12421           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12422             {
12423               if (strcmp (arch, cpu_arch [j].name) == 0)
12424                 {
12425                   /* Processor.  */
12426                   if (! cpu_arch[j].flags.bitfield.cpui386)
12427                     continue;
12428
12429                   cpu_arch_name = cpu_arch[j].name;
12430                   cpu_sub_arch_name = NULL;
12431                   cpu_arch_flags = cpu_arch[j].flags;
12432                   cpu_arch_isa = cpu_arch[j].type;
12433                   cpu_arch_isa_flags = cpu_arch[j].flags;
12434                   if (!cpu_arch_tune_set)
12435                     {
12436                       cpu_arch_tune = cpu_arch_isa;
12437                       cpu_arch_tune_flags = cpu_arch_isa_flags;
12438                     }
12439                   break;
12440                 }
12441               else if (*cpu_arch [j].name == '.'
12442                        && strcmp (arch, cpu_arch [j].name + 1) == 0)
12443                 {
12444                   /* ISA extension.  */
12445                   i386_cpu_flags flags;
12446
12447                   flags = cpu_flags_or (cpu_arch_flags,
12448                                         cpu_arch[j].flags);
12449
12450                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12451                     {
12452                       if (cpu_sub_arch_name)
12453                         {
12454                           char *name = cpu_sub_arch_name;
12455                           cpu_sub_arch_name = concat (name,
12456                                                       cpu_arch[j].name,
12457                                                       (const char *) NULL);
12458                           free (name);
12459                         }
12460                       else
12461                         cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
12462                       cpu_arch_flags = flags;
12463                       cpu_arch_isa_flags = flags;
12464                     }
12465                   else
12466                     cpu_arch_isa_flags
12467                       = cpu_flags_or (cpu_arch_isa_flags,
12468                                       cpu_arch[j].flags);
12469                   break;
12470                 }
12471             }
12472
12473           if (j >= ARRAY_SIZE (cpu_arch))
12474             {
12475               /* Disable an ISA extension.  */
12476               for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
12477                 if (strcmp (arch, cpu_noarch [j].name) == 0)
12478                   {
12479                     i386_cpu_flags flags;
12480
12481                     flags = cpu_flags_and_not (cpu_arch_flags,
12482                                                cpu_noarch[j].flags);
12483                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12484                       {
12485                         if (cpu_sub_arch_name)
12486                           {
12487                             char *name = cpu_sub_arch_name;
12488                             cpu_sub_arch_name = concat (arch,
12489                                                         (const char *) NULL);
12490                             free (name);
12491                           }
12492                         else
12493                           cpu_sub_arch_name = xstrdup (arch);
12494                         cpu_arch_flags = flags;
12495                         cpu_arch_isa_flags = flags;
12496                       }
12497                     break;
12498                   }
12499
12500               if (j >= ARRAY_SIZE (cpu_noarch))
12501                 j = ARRAY_SIZE (cpu_arch);
12502             }
12503
12504           if (j >= ARRAY_SIZE (cpu_arch))
12505             as_fatal (_("invalid -march= option: `%s'"), arg);
12506
12507           arch = next;
12508         }
12509       while (next != NULL);
12510       free (saved);
12511       break;
12512
12513     case OPTION_MTUNE:
12514       if (*arg == '.')
12515         as_fatal (_("invalid -mtune= option: `%s'"), arg);
12516       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12517         {
12518           if (strcmp (arg, cpu_arch [j].name) == 0)
12519             {
12520               cpu_arch_tune_set = 1;
12521               cpu_arch_tune = cpu_arch [j].type;
12522               cpu_arch_tune_flags = cpu_arch[j].flags;
12523               break;
12524             }
12525         }
12526       if (j >= ARRAY_SIZE (cpu_arch))
12527         as_fatal (_("invalid -mtune= option: `%s'"), arg);
12528       break;
12529
12530     case OPTION_MMNEMONIC:
12531       if (strcasecmp (arg, "att") == 0)
12532         intel_mnemonic = 0;
12533       else if (strcasecmp (arg, "intel") == 0)
12534         intel_mnemonic = 1;
12535       else
12536         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
12537       break;
12538
12539     case OPTION_MSYNTAX:
12540       if (strcasecmp (arg, "att") == 0)
12541         intel_syntax = 0;
12542       else if (strcasecmp (arg, "intel") == 0)
12543         intel_syntax = 1;
12544       else
12545         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
12546       break;
12547
12548     case OPTION_MINDEX_REG:
12549       allow_index_reg = 1;
12550       break;
12551
12552     case OPTION_MNAKED_REG:
12553       allow_naked_reg = 1;
12554       break;
12555
12556     case OPTION_MSSE2AVX:
12557       sse2avx = 1;
12558       break;
12559
12560     case OPTION_MSSE_CHECK:
12561       if (strcasecmp (arg, "error") == 0)
12562         sse_check = check_error;
12563       else if (strcasecmp (arg, "warning") == 0)
12564         sse_check = check_warning;
12565       else if (strcasecmp (arg, "none") == 0)
12566         sse_check = check_none;
12567       else
12568         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
12569       break;
12570
12571     case OPTION_MOPERAND_CHECK:
12572       if (strcasecmp (arg, "error") == 0)
12573         operand_check = check_error;
12574       else if (strcasecmp (arg, "warning") == 0)
12575         operand_check = check_warning;
12576       else if (strcasecmp (arg, "none") == 0)
12577         operand_check = check_none;
12578       else
12579         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
12580       break;
12581
12582     case OPTION_MAVXSCALAR:
12583       if (strcasecmp (arg, "128") == 0)
12584         avxscalar = vex128;
12585       else if (strcasecmp (arg, "256") == 0)
12586         avxscalar = vex256;
12587       else
12588         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
12589       break;
12590
12591     case OPTION_MVEXWIG:
12592       if (strcmp (arg, "0") == 0)
12593         vexwig = vexw0;
12594       else if (strcmp (arg, "1") == 0)
12595         vexwig = vexw1;
12596       else
12597         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
12598       break;
12599
12600     case OPTION_MADD_BND_PREFIX:
12601       add_bnd_prefix = 1;
12602       break;
12603
12604     case OPTION_MEVEXLIG:
12605       if (strcmp (arg, "128") == 0)
12606         evexlig = evexl128;
12607       else if (strcmp (arg, "256") == 0)
12608         evexlig = evexl256;
12609       else  if (strcmp (arg, "512") == 0)
12610         evexlig = evexl512;
12611       else
12612         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
12613       break;
12614
12615     case OPTION_MEVEXRCIG:
12616       if (strcmp (arg, "rne") == 0)
12617         evexrcig = rne;
12618       else if (strcmp (arg, "rd") == 0)
12619         evexrcig = rd;
12620       else if (strcmp (arg, "ru") == 0)
12621         evexrcig = ru;
12622       else if (strcmp (arg, "rz") == 0)
12623         evexrcig = rz;
12624       else
12625         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
12626       break;
12627
12628     case OPTION_MEVEXWIG:
12629       if (strcmp (arg, "0") == 0)
12630         evexwig = evexw0;
12631       else if (strcmp (arg, "1") == 0)
12632         evexwig = evexw1;
12633       else
12634         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
12635       break;
12636
12637 # if defined (TE_PE) || defined (TE_PEP)
12638     case OPTION_MBIG_OBJ:
12639       use_big_obj = 1;
12640       break;
12641 #endif
12642
12643     case OPTION_MOMIT_LOCK_PREFIX:
12644       if (strcasecmp (arg, "yes") == 0)
12645         omit_lock_prefix = 1;
12646       else if (strcasecmp (arg, "no") == 0)
12647         omit_lock_prefix = 0;
12648       else
12649         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
12650       break;
12651
12652     case OPTION_MFENCE_AS_LOCK_ADD:
12653       if (strcasecmp (arg, "yes") == 0)
12654         avoid_fence = 1;
12655       else if (strcasecmp (arg, "no") == 0)
12656         avoid_fence = 0;
12657       else
12658         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
12659       break;
12660
12661     case OPTION_MRELAX_RELOCATIONS:
12662       if (strcasecmp (arg, "yes") == 0)
12663         generate_relax_relocations = 1;
12664       else if (strcasecmp (arg, "no") == 0)
12665         generate_relax_relocations = 0;
12666       else
12667         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
12668       break;
12669
12670     case OPTION_MALIGN_BRANCH_BOUNDARY:
12671       {
12672         char *end;
12673         long int align = strtoul (arg, &end, 0);
12674         if (*end == '\0')
12675           {
12676             if (align == 0)
12677               {
12678                 align_branch_power = 0;
12679                 break;
12680               }
12681             else if (align >= 16)
12682               {
12683                 int align_power;
12684                 for (align_power = 0;
12685                      (align & 1) == 0;
12686                      align >>= 1, align_power++)
12687                   continue;
12688                 /* Limit alignment power to 31.  */
12689                 if (align == 1 && align_power < 32)
12690                   {
12691                     align_branch_power = align_power;
12692                     break;
12693                   }
12694               }
12695           }
12696         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
12697       }
12698       break;
12699
12700     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
12701       {
12702         char *end;
12703         int align = strtoul (arg, &end, 0);
12704         /* Some processors only support 5 prefixes.  */
12705         if (*end == '\0' && align >= 0 && align < 6)
12706           {
12707             align_branch_prefix_size = align;
12708             break;
12709           }
12710         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
12711                   arg);
12712       }
12713       break;
12714
12715     case OPTION_MALIGN_BRANCH:
12716       align_branch = 0;
12717       saved = xstrdup (arg);
12718       type = saved;
12719       do
12720         {
12721           next = strchr (type, '+');
12722           if (next)
12723             *next++ = '\0';
12724           if (strcasecmp (type, "jcc") == 0)
12725             align_branch |= align_branch_jcc_bit;
12726           else if (strcasecmp (type, "fused") == 0)
12727             align_branch |= align_branch_fused_bit;
12728           else if (strcasecmp (type, "jmp") == 0)
12729             align_branch |= align_branch_jmp_bit;
12730           else if (strcasecmp (type, "call") == 0)
12731             align_branch |= align_branch_call_bit;
12732           else if (strcasecmp (type, "ret") == 0)
12733             align_branch |= align_branch_ret_bit;
12734           else if (strcasecmp (type, "indirect") == 0)
12735             align_branch |= align_branch_indirect_bit;
12736           else
12737             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
12738           type = next;
12739         }
12740       while (next != NULL);
12741       free (saved);
12742       break;
12743
12744     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
12745       align_branch_power = 5;
12746       align_branch_prefix_size = 5;
12747       align_branch = (align_branch_jcc_bit
12748                       | align_branch_fused_bit
12749                       | align_branch_jmp_bit);
12750       break;
12751
12752     case OPTION_MAMD64:
12753       isa64 = amd64;
12754       break;
12755
12756     case OPTION_MINTEL64:
12757       isa64 = intel64;
12758       break;
12759
12760     case 'O':
12761       if (arg == NULL)
12762         {
12763           optimize = 1;
12764           /* Turn off -Os.  */
12765           optimize_for_space = 0;
12766         }
12767       else if (*arg == 's')
12768         {
12769           optimize_for_space = 1;
12770           /* Turn on all encoding optimizations.  */
12771           optimize = INT_MAX;
12772         }
12773       else
12774         {
12775           optimize = atoi (arg);
12776           /* Turn off -Os.  */
12777           optimize_for_space = 0;
12778         }
12779       break;
12780
12781     default:
12782       return 0;
12783     }
12784   return 1;
12785 }
12786
12787 #define MESSAGE_TEMPLATE \
12788 "                                                                                "
12789
12790 static char *
12791 output_message (FILE *stream, char *p, char *message, char *start,
12792                 int *left_p, const char *name, int len)
12793 {
12794   int size = sizeof (MESSAGE_TEMPLATE);
12795   int left = *left_p;
12796
12797   /* Reserve 2 spaces for ", " or ",\0" */
12798   left -= len + 2;
12799
12800   /* Check if there is any room.  */
12801   if (left >= 0)
12802     {
12803       if (p != start)
12804         {
12805           *p++ = ',';
12806           *p++ = ' ';
12807         }
12808       p = mempcpy (p, name, len);
12809     }
12810   else
12811     {
12812       /* Output the current message now and start a new one.  */
12813       *p++ = ',';
12814       *p = '\0';
12815       fprintf (stream, "%s\n", message);
12816       p = start;
12817       left = size - (start - message) - len - 2;
12818
12819       gas_assert (left >= 0);
12820
12821       p = mempcpy (p, name, len);
12822     }
12823
12824   *left_p = left;
12825   return p;
12826 }
12827
12828 static void
12829 show_arch (FILE *stream, int ext, int check)
12830 {
12831   static char message[] = MESSAGE_TEMPLATE;
12832   char *start = message + 27;
12833   char *p;
12834   int size = sizeof (MESSAGE_TEMPLATE);
12835   int left;
12836   const char *name;
12837   int len;
12838   unsigned int j;
12839
12840   p = start;
12841   left = size - (start - message);
12842   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12843     {
12844       /* Should it be skipped?  */
12845       if (cpu_arch [j].skip)
12846         continue;
12847
12848       name = cpu_arch [j].name;
12849       len = cpu_arch [j].len;
12850       if (*name == '.')
12851         {
12852           /* It is an extension.  Skip if we aren't asked to show it.  */
12853           if (ext)
12854             {
12855               name++;
12856               len--;
12857             }
12858           else
12859             continue;
12860         }
12861       else if (ext)
12862         {
12863           /* It is an processor.  Skip if we show only extension.  */
12864           continue;
12865         }
12866       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
12867         {
12868           /* It is an impossible processor - skip.  */
12869           continue;
12870         }
12871
12872       p = output_message (stream, p, message, start, &left, name, len);
12873     }
12874
12875   /* Display disabled extensions.  */
12876   if (ext)
12877     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
12878       {
12879         name = cpu_noarch [j].name;
12880         len = cpu_noarch [j].len;
12881         p = output_message (stream, p, message, start, &left, name,
12882                             len);
12883       }
12884
12885   *p = '\0';
12886   fprintf (stream, "%s\n", message);
12887 }
12888
12889 void
12890 md_show_usage (FILE *stream)
12891 {
12892 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12893   fprintf (stream, _("\
12894   -Qy, -Qn                ignored\n\
12895   -V                      print assembler version number\n\
12896   -k                      ignored\n"));
12897 #endif
12898   fprintf (stream, _("\
12899   -n                      Do not optimize code alignment\n\
12900   -q                      quieten some warnings\n"));
12901 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12902   fprintf (stream, _("\
12903   -s                      ignored\n"));
12904 #endif
12905 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12906                       || defined (TE_PE) || defined (TE_PEP))
12907   fprintf (stream, _("\
12908   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
12909 #endif
12910 #ifdef SVR4_COMMENT_CHARS
12911   fprintf (stream, _("\
12912   --divide                do not treat `/' as a comment character\n"));
12913 #else
12914   fprintf (stream, _("\
12915   --divide                ignored\n"));
12916 #endif
12917   fprintf (stream, _("\
12918   -march=CPU[,+EXTENSION...]\n\
12919                           generate code for CPU and EXTENSION, CPU is one of:\n"));
12920   show_arch (stream, 0, 1);
12921   fprintf (stream, _("\
12922                           EXTENSION is combination of:\n"));
12923   show_arch (stream, 1, 0);
12924   fprintf (stream, _("\
12925   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
12926   show_arch (stream, 0, 0);
12927   fprintf (stream, _("\
12928   -msse2avx               encode SSE instructions with VEX prefix\n"));
12929   fprintf (stream, _("\
12930   -msse-check=[none|error|warning] (default: warning)\n\
12931                           check SSE instructions\n"));
12932   fprintf (stream, _("\
12933   -moperand-check=[none|error|warning] (default: warning)\n\
12934                           check operand combinations for validity\n"));
12935   fprintf (stream, _("\
12936   -mavxscalar=[128|256] (default: 128)\n\
12937                           encode scalar AVX instructions with specific vector\n\
12938                            length\n"));
12939   fprintf (stream, _("\
12940   -mvexwig=[0|1] (default: 0)\n\
12941                           encode VEX instructions with specific VEX.W value\n\
12942                            for VEX.W bit ignored instructions\n"));
12943   fprintf (stream, _("\
12944   -mevexlig=[128|256|512] (default: 128)\n\
12945                           encode scalar EVEX instructions with specific vector\n\
12946                            length\n"));
12947   fprintf (stream, _("\
12948   -mevexwig=[0|1] (default: 0)\n\
12949                           encode EVEX instructions with specific EVEX.W value\n\
12950                            for EVEX.W bit ignored instructions\n"));
12951   fprintf (stream, _("\
12952   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
12953                           encode EVEX instructions with specific EVEX.RC value\n\
12954                            for SAE-only ignored instructions\n"));
12955   fprintf (stream, _("\
12956   -mmnemonic=[att|intel] "));
12957   if (SYSV386_COMPAT)
12958     fprintf (stream, _("(default: att)\n"));
12959   else
12960     fprintf (stream, _("(default: intel)\n"));
12961   fprintf (stream, _("\
12962                           use AT&T/Intel mnemonic\n"));
12963   fprintf (stream, _("\
12964   -msyntax=[att|intel] (default: att)\n\
12965                           use AT&T/Intel syntax\n"));
12966   fprintf (stream, _("\
12967   -mindex-reg             support pseudo index registers\n"));
12968   fprintf (stream, _("\
12969   -mnaked-reg             don't require `%%' prefix for registers\n"));
12970   fprintf (stream, _("\
12971   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
12972 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12973   fprintf (stream, _("\
12974   -mshared                disable branch optimization for shared code\n"));
12975   fprintf (stream, _("\
12976   -mx86-used-note=[no|yes] "));
12977   if (DEFAULT_X86_USED_NOTE)
12978     fprintf (stream, _("(default: yes)\n"));
12979   else
12980     fprintf (stream, _("(default: no)\n"));
12981   fprintf (stream, _("\
12982                           generate x86 used ISA and feature properties\n"));
12983 #endif
12984 #if defined (TE_PE) || defined (TE_PEP)
12985   fprintf (stream, _("\
12986   -mbig-obj               generate big object files\n"));
12987 #endif
12988   fprintf (stream, _("\
12989   -momit-lock-prefix=[no|yes] (default: no)\n\
12990                           strip all lock prefixes\n"));
12991   fprintf (stream, _("\
12992   -mfence-as-lock-add=[no|yes] (default: no)\n\
12993                           encode lfence, mfence and sfence as\n\
12994                            lock addl $0x0, (%%{re}sp)\n"));
12995   fprintf (stream, _("\
12996   -mrelax-relocations=[no|yes] "));
12997   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
12998     fprintf (stream, _("(default: yes)\n"));
12999   else
13000     fprintf (stream, _("(default: no)\n"));
13001   fprintf (stream, _("\
13002                           generate relax relocations\n"));
13003   fprintf (stream, _("\
13004   -malign-branch-boundary=NUM (default: 0)\n\
13005                           align branches within NUM byte boundary\n"));
13006   fprintf (stream, _("\
13007   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13008                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13009                            indirect\n\
13010                           specify types of branches to align\n"));
13011   fprintf (stream, _("\
13012   -malign-branch-prefix-size=NUM (default: 5)\n\
13013                           align branches with NUM prefixes per instruction\n"));
13014   fprintf (stream, _("\
13015   -mbranches-within-32B-boundaries\n\
13016                           align branches within 32 byte boundary\n"));
13017   fprintf (stream, _("\
13018   -mamd64                 accept only AMD64 ISA [default]\n"));
13019   fprintf (stream, _("\
13020   -mintel64               accept only Intel64 ISA\n"));
13021 }
13022
13023 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
13024      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13025      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13026
13027 /* Pick the target format to use.  */
13028
13029 const char *
13030 i386_target_format (void)
13031 {
13032   if (!strncmp (default_arch, "x86_64", 6))
13033     {
13034       update_code_flag (CODE_64BIT, 1);
13035       if (default_arch[6] == '\0')
13036         x86_elf_abi = X86_64_ABI;
13037       else
13038         x86_elf_abi = X86_64_X32_ABI;
13039     }
13040   else if (!strcmp (default_arch, "i386"))
13041     update_code_flag (CODE_32BIT, 1);
13042   else if (!strcmp (default_arch, "iamcu"))
13043     {
13044       update_code_flag (CODE_32BIT, 1);
13045       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
13046         {
13047           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
13048           cpu_arch_name = "iamcu";
13049           cpu_sub_arch_name = NULL;
13050           cpu_arch_flags = iamcu_flags;
13051           cpu_arch_isa = PROCESSOR_IAMCU;
13052           cpu_arch_isa_flags = iamcu_flags;
13053           if (!cpu_arch_tune_set)
13054             {
13055               cpu_arch_tune = cpu_arch_isa;
13056               cpu_arch_tune_flags = cpu_arch_isa_flags;
13057             }
13058         }
13059       else if (cpu_arch_isa != PROCESSOR_IAMCU)
13060         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
13061                   cpu_arch_name);
13062     }
13063   else
13064     as_fatal (_("unknown architecture"));
13065
13066   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
13067     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13068   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
13069     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
13070
13071   switch (OUTPUT_FLAVOR)
13072     {
13073 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
13074     case bfd_target_aout_flavour:
13075       return AOUT_TARGET_FORMAT;
13076 #endif
13077 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
13078 # if defined (TE_PE) || defined (TE_PEP)
13079     case bfd_target_coff_flavour:
13080       if (flag_code == CODE_64BIT)
13081         return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
13082       else
13083         return "pe-i386";
13084 # elif defined (TE_GO32)
13085     case bfd_target_coff_flavour:
13086       return "coff-go32";
13087 # else
13088     case bfd_target_coff_flavour:
13089       return "coff-i386";
13090 # endif
13091 #endif
13092 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13093     case bfd_target_elf_flavour:
13094       {
13095         const char *format;
13096
13097         switch (x86_elf_abi)
13098           {
13099           default:
13100             format = ELF_TARGET_FORMAT;
13101 #ifndef TE_SOLARIS
13102             tls_get_addr = "___tls_get_addr";
13103 #endif
13104             break;
13105           case X86_64_ABI:
13106             use_rela_relocations = 1;
13107             object_64bit = 1;
13108 #ifndef TE_SOLARIS
13109             tls_get_addr = "__tls_get_addr";
13110 #endif
13111             format = ELF_TARGET_FORMAT64;
13112             break;
13113           case X86_64_X32_ABI:
13114             use_rela_relocations = 1;
13115             object_64bit = 1;
13116 #ifndef TE_SOLARIS
13117             tls_get_addr = "__tls_get_addr";
13118 #endif
13119             disallow_64bit_reloc = 1;
13120             format = ELF_TARGET_FORMAT32;
13121             break;
13122           }
13123         if (cpu_arch_isa == PROCESSOR_L1OM)
13124           {
13125             if (x86_elf_abi != X86_64_ABI)
13126               as_fatal (_("Intel L1OM is 64bit only"));
13127             return ELF_TARGET_L1OM_FORMAT;
13128           }
13129         else if (cpu_arch_isa == PROCESSOR_K1OM)
13130           {
13131             if (x86_elf_abi != X86_64_ABI)
13132               as_fatal (_("Intel K1OM is 64bit only"));
13133             return ELF_TARGET_K1OM_FORMAT;
13134           }
13135         else if (cpu_arch_isa == PROCESSOR_IAMCU)
13136           {
13137             if (x86_elf_abi != I386_ABI)
13138               as_fatal (_("Intel MCU is 32bit only"));
13139             return ELF_TARGET_IAMCU_FORMAT;
13140           }
13141         else
13142           return format;
13143       }
13144 #endif
13145 #if defined (OBJ_MACH_O)
13146     case bfd_target_mach_o_flavour:
13147       if (flag_code == CODE_64BIT)
13148         {
13149           use_rela_relocations = 1;
13150           object_64bit = 1;
13151           return "mach-o-x86-64";
13152         }
13153       else
13154         return "mach-o-i386";
13155 #endif
13156     default:
13157       abort ();
13158       return NULL;
13159     }
13160 }
13161
13162 #endif /* OBJ_MAYBE_ more than one  */
13163 \f
13164 symbolS *
13165 md_undefined_symbol (char *name)
13166 {
13167   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
13168       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
13169       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
13170       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
13171     {
13172       if (!GOT_symbol)
13173         {
13174           if (symbol_find (name))
13175             as_bad (_("GOT already in symbol table"));
13176           GOT_symbol = symbol_new (name, undefined_section,
13177                                    (valueT) 0, &zero_address_frag);
13178         };
13179       return GOT_symbol;
13180     }
13181   return 0;
13182 }
13183
13184 /* Round up a section size to the appropriate boundary.  */
13185
13186 valueT
13187 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
13188 {
13189 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
13190   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
13191     {
13192       /* For a.out, force the section size to be aligned.  If we don't do
13193          this, BFD will align it for us, but it will not write out the
13194          final bytes of the section.  This may be a bug in BFD, but it is
13195          easier to fix it here since that is how the other a.out targets
13196          work.  */
13197       int align;
13198
13199       align = bfd_section_alignment (segment);
13200       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
13201     }
13202 #endif
13203
13204   return size;
13205 }
13206
13207 /* On the i386, PC-relative offsets are relative to the start of the
13208    next instruction.  That is, the address of the offset, plus its
13209    size, since the offset is always the last part of the insn.  */
13210
13211 long
13212 md_pcrel_from (fixS *fixP)
13213 {
13214   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
13215 }
13216
13217 #ifndef I386COFF
13218
13219 static void
13220 s_bss (int ignore ATTRIBUTE_UNUSED)
13221 {
13222   int temp;
13223
13224 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13225   if (IS_ELF)
13226     obj_elf_section_change_hook ();
13227 #endif
13228   temp = get_absolute_expression ();
13229   subseg_set (bss_section, (subsegT) temp);
13230   demand_empty_rest_of_line ();
13231 }
13232
13233 #endif
13234
13235 /* Remember constant directive.  */
13236
13237 void
13238 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
13239 {
13240   if (last_insn.kind != last_insn_directive
13241       && (bfd_section_flags (now_seg) & SEC_CODE))
13242     {
13243       last_insn.seg = now_seg;
13244       last_insn.kind = last_insn_directive;
13245       last_insn.name = "constant directive";
13246       last_insn.file = as_where (&last_insn.line);
13247     }
13248 }
13249
13250 void
13251 i386_validate_fix (fixS *fixp)
13252 {
13253   if (fixp->fx_subsy)
13254     {
13255       if (fixp->fx_subsy == GOT_symbol)
13256         {
13257           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
13258             {
13259               if (!object_64bit)
13260                 abort ();
13261 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13262               if (fixp->fx_tcbit2)
13263                 fixp->fx_r_type = (fixp->fx_tcbit
13264                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
13265                                    : BFD_RELOC_X86_64_GOTPCRELX);
13266               else
13267 #endif
13268                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
13269             }
13270           else
13271             {
13272               if (!object_64bit)
13273                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
13274               else
13275                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
13276             }
13277           fixp->fx_subsy = 0;
13278         }
13279     }
13280 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13281   else if (!object_64bit)
13282     {
13283       if (fixp->fx_r_type == BFD_RELOC_386_GOT32
13284           && fixp->fx_tcbit2)
13285         fixp->fx_r_type = BFD_RELOC_386_GOT32X;
13286     }
13287 #endif
13288 }
13289
13290 arelent *
13291 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
13292 {
13293   arelent *rel;
13294   bfd_reloc_code_real_type code;
13295
13296   switch (fixp->fx_r_type)
13297     {
13298 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13299     case BFD_RELOC_SIZE32:
13300     case BFD_RELOC_SIZE64:
13301       if (S_IS_DEFINED (fixp->fx_addsy)
13302           && !S_IS_EXTERNAL (fixp->fx_addsy))
13303         {
13304           /* Resolve size relocation against local symbol to size of
13305              the symbol plus addend.  */
13306           valueT value = S_GET_SIZE (fixp->fx_addsy) + fixp->fx_offset;
13307           if (fixp->fx_r_type == BFD_RELOC_SIZE32
13308               && !fits_in_unsigned_long (value))
13309             as_bad_where (fixp->fx_file, fixp->fx_line,
13310                           _("symbol size computation overflow"));
13311           fixp->fx_addsy = NULL;
13312           fixp->fx_subsy = NULL;
13313           md_apply_fix (fixp, (valueT *) &value, NULL);
13314           return NULL;
13315         }
13316 #endif
13317       /* Fall through.  */
13318
13319     case BFD_RELOC_X86_64_PLT32:
13320     case BFD_RELOC_X86_64_GOT32:
13321     case BFD_RELOC_X86_64_GOTPCREL:
13322     case BFD_RELOC_X86_64_GOTPCRELX:
13323     case BFD_RELOC_X86_64_REX_GOTPCRELX:
13324     case BFD_RELOC_386_PLT32:
13325     case BFD_RELOC_386_GOT32:
13326     case BFD_RELOC_386_GOT32X:
13327     case BFD_RELOC_386_GOTOFF:
13328     case BFD_RELOC_386_GOTPC:
13329     case BFD_RELOC_386_TLS_GD:
13330     case BFD_RELOC_386_TLS_LDM:
13331     case BFD_RELOC_386_TLS_LDO_32:
13332     case BFD_RELOC_386_TLS_IE_32:
13333     case BFD_RELOC_386_TLS_IE:
13334     case BFD_RELOC_386_TLS_GOTIE:
13335     case BFD_RELOC_386_TLS_LE_32:
13336     case BFD_RELOC_386_TLS_LE:
13337     case BFD_RELOC_386_TLS_GOTDESC:
13338     case BFD_RELOC_386_TLS_DESC_CALL:
13339     case BFD_RELOC_X86_64_TLSGD:
13340     case BFD_RELOC_X86_64_TLSLD:
13341     case BFD_RELOC_X86_64_DTPOFF32:
13342     case BFD_RELOC_X86_64_DTPOFF64:
13343     case BFD_RELOC_X86_64_GOTTPOFF:
13344     case BFD_RELOC_X86_64_TPOFF32:
13345     case BFD_RELOC_X86_64_TPOFF64:
13346     case BFD_RELOC_X86_64_GOTOFF64:
13347     case BFD_RELOC_X86_64_GOTPC32:
13348     case BFD_RELOC_X86_64_GOT64:
13349     case BFD_RELOC_X86_64_GOTPCREL64:
13350     case BFD_RELOC_X86_64_GOTPC64:
13351     case BFD_RELOC_X86_64_GOTPLT64:
13352     case BFD_RELOC_X86_64_PLTOFF64:
13353     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13354     case BFD_RELOC_X86_64_TLSDESC_CALL:
13355     case BFD_RELOC_RVA:
13356     case BFD_RELOC_VTABLE_ENTRY:
13357     case BFD_RELOC_VTABLE_INHERIT:
13358 #ifdef TE_PE
13359     case BFD_RELOC_32_SECREL:
13360 #endif
13361       code = fixp->fx_r_type;
13362       break;
13363     case BFD_RELOC_X86_64_32S:
13364       if (!fixp->fx_pcrel)
13365         {
13366           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
13367           code = fixp->fx_r_type;
13368           break;
13369         }
13370       /* Fall through.  */
13371     default:
13372       if (fixp->fx_pcrel)
13373         {
13374           switch (fixp->fx_size)
13375             {
13376             default:
13377               as_bad_where (fixp->fx_file, fixp->fx_line,
13378                             _("can not do %d byte pc-relative relocation"),
13379                             fixp->fx_size);
13380               code = BFD_RELOC_32_PCREL;
13381               break;
13382             case 1: code = BFD_RELOC_8_PCREL;  break;
13383             case 2: code = BFD_RELOC_16_PCREL; break;
13384             case 4: code = BFD_RELOC_32_PCREL; break;
13385 #ifdef BFD64
13386             case 8: code = BFD_RELOC_64_PCREL; break;
13387 #endif
13388             }
13389         }
13390       else
13391         {
13392           switch (fixp->fx_size)
13393             {
13394             default:
13395               as_bad_where (fixp->fx_file, fixp->fx_line,
13396                             _("can not do %d byte relocation"),
13397                             fixp->fx_size);
13398               code = BFD_RELOC_32;
13399               break;
13400             case 1: code = BFD_RELOC_8;  break;
13401             case 2: code = BFD_RELOC_16; break;
13402             case 4: code = BFD_RELOC_32; break;
13403 #ifdef BFD64
13404             case 8: code = BFD_RELOC_64; break;
13405 #endif
13406             }
13407         }
13408       break;
13409     }
13410
13411   if ((code == BFD_RELOC_32
13412        || code == BFD_RELOC_32_PCREL
13413        || code == BFD_RELOC_X86_64_32S)
13414       && GOT_symbol
13415       && fixp->fx_addsy == GOT_symbol)
13416     {
13417       if (!object_64bit)
13418         code = BFD_RELOC_386_GOTPC;
13419       else
13420         code = BFD_RELOC_X86_64_GOTPC32;
13421     }
13422   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
13423       && GOT_symbol
13424       && fixp->fx_addsy == GOT_symbol)
13425     {
13426       code = BFD_RELOC_X86_64_GOTPC64;
13427     }
13428
13429   rel = XNEW (arelent);
13430   rel->sym_ptr_ptr = XNEW (asymbol *);
13431   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
13432
13433   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
13434
13435   if (!use_rela_relocations)
13436     {
13437       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
13438          vtable entry to be used in the relocation's section offset.  */
13439       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
13440         rel->address = fixp->fx_offset;
13441 #if defined (OBJ_COFF) && defined (TE_PE)
13442       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
13443         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
13444       else
13445 #endif
13446       rel->addend = 0;
13447     }
13448   /* Use the rela in 64bit mode.  */
13449   else
13450     {
13451       if (disallow_64bit_reloc)
13452         switch (code)
13453           {
13454           case BFD_RELOC_X86_64_DTPOFF64:
13455           case BFD_RELOC_X86_64_TPOFF64:
13456           case BFD_RELOC_64_PCREL:
13457           case BFD_RELOC_X86_64_GOTOFF64:
13458           case BFD_RELOC_X86_64_GOT64:
13459           case BFD_RELOC_X86_64_GOTPCREL64:
13460           case BFD_RELOC_X86_64_GOTPC64:
13461           case BFD_RELOC_X86_64_GOTPLT64:
13462           case BFD_RELOC_X86_64_PLTOFF64:
13463             as_bad_where (fixp->fx_file, fixp->fx_line,
13464                           _("cannot represent relocation type %s in x32 mode"),
13465                           bfd_get_reloc_code_name (code));
13466             break;
13467           default:
13468             break;
13469           }
13470
13471       if (!fixp->fx_pcrel)
13472         rel->addend = fixp->fx_offset;
13473       else
13474         switch (code)
13475           {
13476           case BFD_RELOC_X86_64_PLT32:
13477           case BFD_RELOC_X86_64_GOT32:
13478           case BFD_RELOC_X86_64_GOTPCREL:
13479           case BFD_RELOC_X86_64_GOTPCRELX:
13480           case BFD_RELOC_X86_64_REX_GOTPCRELX:
13481           case BFD_RELOC_X86_64_TLSGD:
13482           case BFD_RELOC_X86_64_TLSLD:
13483           case BFD_RELOC_X86_64_GOTTPOFF:
13484           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13485           case BFD_RELOC_X86_64_TLSDESC_CALL:
13486             rel->addend = fixp->fx_offset - fixp->fx_size;
13487             break;
13488           default:
13489             rel->addend = (section->vma
13490                            - fixp->fx_size
13491                            + fixp->fx_addnumber
13492                            + md_pcrel_from (fixp));
13493             break;
13494           }
13495     }
13496
13497   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
13498   if (rel->howto == NULL)
13499     {
13500       as_bad_where (fixp->fx_file, fixp->fx_line,
13501                     _("cannot represent relocation type %s"),
13502                     bfd_get_reloc_code_name (code));
13503       /* Set howto to a garbage value so that we can keep going.  */
13504       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
13505       gas_assert (rel->howto != NULL);
13506     }
13507
13508   return rel;
13509 }
13510
13511 #include "tc-i386-intel.c"
13512
13513 void
13514 tc_x86_parse_to_dw2regnum (expressionS *exp)
13515 {
13516   int saved_naked_reg;
13517   char saved_register_dot;
13518
13519   saved_naked_reg = allow_naked_reg;
13520   allow_naked_reg = 1;
13521   saved_register_dot = register_chars['.'];
13522   register_chars['.'] = '.';
13523   allow_pseudo_reg = 1;
13524   expression_and_evaluate (exp);
13525   allow_pseudo_reg = 0;
13526   register_chars['.'] = saved_register_dot;
13527   allow_naked_reg = saved_naked_reg;
13528
13529   if (exp->X_op == O_register && exp->X_add_number >= 0)
13530     {
13531       if ((addressT) exp->X_add_number < i386_regtab_size)
13532         {
13533           exp->X_op = O_constant;
13534           exp->X_add_number = i386_regtab[exp->X_add_number]
13535                               .dw2_regnum[flag_code >> 1];
13536         }
13537       else
13538         exp->X_op = O_illegal;
13539     }
13540 }
13541
13542 void
13543 tc_x86_frame_initial_instructions (void)
13544 {
13545   static unsigned int sp_regno[2];
13546
13547   if (!sp_regno[flag_code >> 1])
13548     {
13549       char *saved_input = input_line_pointer;
13550       char sp[][4] = {"esp", "rsp"};
13551       expressionS exp;
13552
13553       input_line_pointer = sp[flag_code >> 1];
13554       tc_x86_parse_to_dw2regnum (&exp);
13555       gas_assert (exp.X_op == O_constant);
13556       sp_regno[flag_code >> 1] = exp.X_add_number;
13557       input_line_pointer = saved_input;
13558     }
13559
13560   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
13561   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
13562 }
13563
13564 int
13565 x86_dwarf2_addr_size (void)
13566 {
13567 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13568   if (x86_elf_abi == X86_64_X32_ABI)
13569     return 4;
13570 #endif
13571   return bfd_arch_bits_per_address (stdoutput) / 8;
13572 }
13573
13574 int
13575 i386_elf_section_type (const char *str, size_t len)
13576 {
13577   if (flag_code == CODE_64BIT
13578       && len == sizeof ("unwind") - 1
13579       && strncmp (str, "unwind", 6) == 0)
13580     return SHT_X86_64_UNWIND;
13581
13582   return -1;
13583 }
13584
13585 #ifdef TE_SOLARIS
13586 void
13587 i386_solaris_fix_up_eh_frame (segT sec)
13588 {
13589   if (flag_code == CODE_64BIT)
13590     elf_section_type (sec) = SHT_X86_64_UNWIND;
13591 }
13592 #endif
13593
13594 #ifdef TE_PE
13595 void
13596 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
13597 {
13598   expressionS exp;
13599
13600   exp.X_op = O_secrel;
13601   exp.X_add_symbol = symbol;
13602   exp.X_add_number = 0;
13603   emit_expr (&exp, size);
13604 }
13605 #endif
13606
13607 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13608 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
13609
13610 bfd_vma
13611 x86_64_section_letter (int letter, const char **ptr_msg)
13612 {
13613   if (flag_code == CODE_64BIT)
13614     {
13615       if (letter == 'l')
13616         return SHF_X86_64_LARGE;
13617
13618       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
13619     }
13620   else
13621     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
13622   return -1;
13623 }
13624
13625 bfd_vma
13626 x86_64_section_word (char *str, size_t len)
13627 {
13628   if (len == 5 && flag_code == CODE_64BIT && CONST_STRNEQ (str, "large"))
13629     return SHF_X86_64_LARGE;
13630
13631   return -1;
13632 }
13633
13634 static void
13635 handle_large_common (int small ATTRIBUTE_UNUSED)
13636 {
13637   if (flag_code != CODE_64BIT)
13638     {
13639       s_comm_internal (0, elf_common_parse);
13640       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
13641     }
13642   else
13643     {
13644       static segT lbss_section;
13645       asection *saved_com_section_ptr = elf_com_section_ptr;
13646       asection *saved_bss_section = bss_section;
13647
13648       if (lbss_section == NULL)
13649         {
13650           flagword applicable;
13651           segT seg = now_seg;
13652           subsegT subseg = now_subseg;
13653
13654           /* The .lbss section is for local .largecomm symbols.  */
13655           lbss_section = subseg_new (".lbss", 0);
13656           applicable = bfd_applicable_section_flags (stdoutput);
13657           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
13658           seg_info (lbss_section)->bss = 1;
13659
13660           subseg_set (seg, subseg);
13661         }
13662
13663       elf_com_section_ptr = &_bfd_elf_large_com_section;
13664       bss_section = lbss_section;
13665
13666       s_comm_internal (0, elf_common_parse);
13667
13668       elf_com_section_ptr = saved_com_section_ptr;
13669       bss_section = saved_bss_section;
13670     }
13671 }
13672 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */