diff options
Diffstat (limited to 'contrib/gcc/config/ia64/ia64.c')
-rw-r--r-- | contrib/gcc/config/ia64/ia64.c | 9770 |
1 files changed, 0 insertions, 9770 deletions
diff --git a/contrib/gcc/config/ia64/ia64.c b/contrib/gcc/config/ia64/ia64.c deleted file mode 100644 index 2bbc9a707c1f..000000000000 --- a/contrib/gcc/config/ia64/ia64.c +++ /dev/null @@ -1,9770 +0,0 @@ -/* Definitions of target machine for GNU compiler. - Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 - Free Software Foundation, Inc. - Contributed by James E. Wilson <wilson@cygnus.com> and - David Mosberger <davidm@hpl.hp.com>. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to -the Free Software Foundation, 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "rtl.h" -#include "tree.h" -#include "regs.h" -#include "hard-reg-set.h" -#include "real.h" -#include "insn-config.h" -#include "conditions.h" -#include "output.h" -#include "insn-attr.h" -#include "flags.h" -#include "recog.h" -#include "expr.h" -#include "optabs.h" -#include "except.h" -#include "function.h" -#include "ggc.h" -#include "basic-block.h" -#include "toplev.h" -#include "sched-int.h" -#include "timevar.h" -#include "target.h" -#include "target-def.h" -#include "tm_p.h" -#include "hashtab.h" -#include "langhooks.h" -#include "cfglayout.h" -#include "tree-gimple.h" -#include "intl.h" -#include "debug.h" -#include "params.h" - -/* This is used for communication between ASM_OUTPUT_LABEL and - ASM_OUTPUT_LABELREF. */ -int ia64_asm_output_label = 0; - -/* Define the information needed to generate branch and scc insns. This is - stored from the compare operation. */ -struct rtx_def * ia64_compare_op0; -struct rtx_def * ia64_compare_op1; - -/* Register names for ia64_expand_prologue. */ -static const char * const ia64_reg_numbers[96] = -{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", - "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", - "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", - "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", - "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", - "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", - "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", - "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", - "r96", "r97", "r98", "r99", "r100","r101","r102","r103", - "r104","r105","r106","r107","r108","r109","r110","r111", - "r112","r113","r114","r115","r116","r117","r118","r119", - "r120","r121","r122","r123","r124","r125","r126","r127"}; - -/* ??? These strings could be shared with REGISTER_NAMES. */ -static const char * const ia64_input_reg_names[8] = -{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; - -/* ??? These strings could be shared with REGISTER_NAMES. */ -static const char * const ia64_local_reg_names[80] = -{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", - "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", - "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", - "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", - "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", - "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", - "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", - "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", - "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", - "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; - -/* ??? These strings could be shared with REGISTER_NAMES. */ -static const char * const ia64_output_reg_names[8] = -{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; - -/* Which cpu are we scheduling for. */ -enum processor_type ia64_tune = PROCESSOR_ITANIUM2; - -/* Determines whether we run our final scheduling pass or not. We always - avoid the normal second scheduling pass. */ -static int ia64_flag_schedule_insns2; - -/* Determines whether we run variable tracking in machine dependent - reorganization. */ -static int ia64_flag_var_tracking; - -/* Variables which are this size or smaller are put in the sdata/sbss - sections. */ - -unsigned int ia64_section_threshold; - -/* The following variable is used by the DFA insn scheduler. The value is - TRUE if we do insn bundling instead of insn scheduling. */ -int bundling_p = 0; - -/* Structure to be filled in by ia64_compute_frame_size with register - save masks and offsets for the current function. */ - -struct ia64_frame_info -{ - HOST_WIDE_INT total_size; /* size of the stack frame, not including - the caller's scratch area. */ - HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ - HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ - HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ - HARD_REG_SET mask; /* mask of saved registers. */ - unsigned int gr_used_mask; /* mask of registers in use as gr spill - registers or long-term scratches. */ - int n_spilled; /* number of spilled registers. */ - int reg_fp; /* register for fp. */ - int reg_save_b0; /* save register for b0. */ - int reg_save_pr; /* save register for prs. */ - int reg_save_ar_pfs; /* save register for ar.pfs. */ - int reg_save_ar_unat; /* save register for ar.unat. */ - int reg_save_ar_lc; /* save register for ar.lc. */ - int reg_save_gp; /* save register for gp. */ - int n_input_regs; /* number of input registers used. */ - int n_local_regs; /* number of local registers used. */ - int n_output_regs; /* number of output registers used. */ - int n_rotate_regs; /* number of rotating registers used. */ - - char need_regstk; /* true if a .regstk directive needed. */ - char initialized; /* true if the data is finalized. */ -}; - -/* Current frame information calculated by ia64_compute_frame_size. */ -static struct ia64_frame_info current_frame_info; - -static int ia64_first_cycle_multipass_dfa_lookahead (void); -static void ia64_dependencies_evaluation_hook (rtx, rtx); -static void ia64_init_dfa_pre_cycle_insn (void); -static rtx ia64_dfa_pre_cycle_insn (void); -static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx); -static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx); -static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *); -static void ia64_h_i_d_extended (void); -static int ia64_mode_to_int (enum machine_mode); -static void ia64_set_sched_flags (spec_info_t); -static int ia64_speculate_insn (rtx, ds_t, rtx *); -static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool); -static bool ia64_needs_block_p (rtx); -static rtx ia64_gen_check (rtx, rtx, bool); -static int ia64_spec_check_p (rtx); -static int ia64_spec_check_src_p (rtx); -static rtx gen_tls_get_addr (void); -static rtx gen_thread_pointer (void); -static int find_gr_spill (int); -static int next_scratch_gr_reg (void); -static void mark_reg_gr_used_mask (rtx, void *); -static void ia64_compute_frame_size (HOST_WIDE_INT); -static void setup_spill_pointers (int, rtx, HOST_WIDE_INT); -static void finish_spill_pointers (void); -static rtx spill_restore_mem (rtx, HOST_WIDE_INT); -static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx); -static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT); -static rtx gen_movdi_x (rtx, rtx, rtx); -static rtx gen_fr_spill_x (rtx, rtx, rtx); -static rtx gen_fr_restore_x (rtx, rtx, rtx); - -static enum machine_mode hfa_element_mode (tree, bool); -static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, - tree, int *, int); -static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, - tree, bool); -static bool ia64_function_ok_for_sibcall (tree, tree); -static bool ia64_return_in_memory (tree, tree); -static bool ia64_rtx_costs (rtx, int, int, int *); -static void fix_range (const char *); -static bool ia64_handle_option (size_t, const char *, int); -static struct machine_function * ia64_init_machine_status (void); -static void emit_insn_group_barriers (FILE *); -static void emit_all_insn_group_barriers (FILE *); -static void final_emit_insn_group_barriers (FILE *); -static void emit_predicate_relation_info (void); -static void ia64_reorg (void); -static bool ia64_in_small_data_p (tree); -static void process_epilogue (FILE *, rtx, bool, bool); -static int process_set (FILE *, rtx, rtx, bool, bool); - -static bool ia64_assemble_integer (rtx, unsigned int, int); -static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT); -static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT); -static void ia64_output_function_end_prologue (FILE *); - -static int ia64_issue_rate (void); -static int ia64_adjust_cost_2 (rtx, int, rtx, int); -static void ia64_sched_init (FILE *, int, int); -static void ia64_sched_init_global (FILE *, int, int); -static void ia64_sched_finish_global (FILE *, int); -static void ia64_sched_finish (FILE *, int); -static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int); -static int ia64_sched_reorder (FILE *, int, rtx *, int *, int); -static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int); -static int ia64_variable_issue (FILE *, int, rtx, int); - -static struct bundle_state *get_free_bundle_state (void); -static void free_bundle_state (struct bundle_state *); -static void initiate_bundle_states (void); -static void finish_bundle_states (void); -static unsigned bundle_state_hash (const void *); -static int bundle_state_eq_p (const void *, const void *); -static int insert_bundle_state (struct bundle_state *); -static void initiate_bundle_state_table (void); -static void finish_bundle_state_table (void); -static int try_issue_nops (struct bundle_state *, int); -static int try_issue_insn (struct bundle_state *, rtx); -static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int); -static int get_max_pos (state_t); -static int get_template (state_t, int); - -static rtx get_next_important_insn (rtx, rtx); -static void bundling (FILE *, int, rtx, rtx); - -static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, - HOST_WIDE_INT, tree); -static void ia64_file_start (void); - -static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED; -static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED; -static section *ia64_select_rtx_section (enum machine_mode, rtx, - unsigned HOST_WIDE_INT); -static void ia64_output_dwarf_dtprel (FILE *, int, rtx) - ATTRIBUTE_UNUSED; -static unsigned int ia64_section_type_flags (tree, const char *, int); -static void ia64_init_libfuncs (void) - ATTRIBUTE_UNUSED; -static void ia64_hpux_init_libfuncs (void) - ATTRIBUTE_UNUSED; -static void ia64_sysv4_init_libfuncs (void) - ATTRIBUTE_UNUSED; -static void ia64_vms_init_libfuncs (void) - ATTRIBUTE_UNUSED; - -static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *); -static void ia64_encode_section_info (tree, rtx, int); -static rtx ia64_struct_value_rtx (tree, int); -static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *); -static bool ia64_scalar_mode_supported_p (enum machine_mode mode); -static bool ia64_vector_mode_supported_p (enum machine_mode mode); -static bool ia64_cannot_force_const_mem (rtx); -static const char *ia64_mangle_fundamental_type (tree); -static const char *ia64_invalid_conversion (tree, tree); -static const char *ia64_invalid_unary_op (int, tree); -static const char *ia64_invalid_binary_op (int, tree, tree); - -/* Table of valid machine attributes. */ -static const struct attribute_spec ia64_attribute_table[] = -{ - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ - { "syscall_linkage", 0, 0, false, true, true, NULL }, - { "model", 1, 1, true, false, false, ia64_handle_model_attribute }, - { NULL, 0, 0, false, false, false, NULL } -}; - -/* Initialize the GCC target structure. */ -#undef TARGET_ATTRIBUTE_TABLE -#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table - -#undef TARGET_INIT_BUILTINS -#define TARGET_INIT_BUILTINS ia64_init_builtins - -#undef TARGET_EXPAND_BUILTIN -#define TARGET_EXPAND_BUILTIN ia64_expand_builtin - -#undef TARGET_ASM_BYTE_OP -#define TARGET_ASM_BYTE_OP "\tdata1\t" -#undef TARGET_ASM_ALIGNED_HI_OP -#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" -#undef TARGET_ASM_ALIGNED_SI_OP -#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" -#undef TARGET_ASM_ALIGNED_DI_OP -#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" -#undef TARGET_ASM_UNALIGNED_HI_OP -#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" -#undef TARGET_ASM_UNALIGNED_SI_OP -#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" -#undef TARGET_ASM_UNALIGNED_DI_OP -#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" -#undef TARGET_ASM_INTEGER -#define TARGET_ASM_INTEGER ia64_assemble_integer - -#undef TARGET_ASM_FUNCTION_PROLOGUE -#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue -#undef TARGET_ASM_FUNCTION_END_PROLOGUE -#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue -#undef TARGET_ASM_FUNCTION_EPILOGUE -#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue - -#undef TARGET_IN_SMALL_DATA_P -#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p - -#undef TARGET_SCHED_ADJUST_COST_2 -#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2 -#undef TARGET_SCHED_ISSUE_RATE -#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate -#undef TARGET_SCHED_VARIABLE_ISSUE -#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue -#undef TARGET_SCHED_INIT -#define TARGET_SCHED_INIT ia64_sched_init -#undef TARGET_SCHED_FINISH -#define TARGET_SCHED_FINISH ia64_sched_finish -#undef TARGET_SCHED_INIT_GLOBAL -#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global -#undef TARGET_SCHED_FINISH_GLOBAL -#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global -#undef TARGET_SCHED_REORDER -#define TARGET_SCHED_REORDER ia64_sched_reorder -#undef TARGET_SCHED_REORDER2 -#define TARGET_SCHED_REORDER2 ia64_sched_reorder2 - -#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK -#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook - -#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD -#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead - -#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN -#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn -#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN -#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn - -#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD -#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\ - ia64_first_cycle_multipass_dfa_lookahead_guard - -#undef TARGET_SCHED_DFA_NEW_CYCLE -#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle - -#undef TARGET_SCHED_H_I_D_EXTENDED -#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended - -#undef TARGET_SCHED_SET_SCHED_FLAGS -#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags - -#undef TARGET_SCHED_SPECULATE_INSN -#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn - -#undef TARGET_SCHED_NEEDS_BLOCK_P -#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p - -#undef TARGET_SCHED_GEN_CHECK -#define TARGET_SCHED_GEN_CHECK ia64_gen_check - -#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC -#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\ - ia64_first_cycle_multipass_dfa_lookahead_guard_spec - -#undef TARGET_FUNCTION_OK_FOR_SIBCALL -#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall -#undef TARGET_ARG_PARTIAL_BYTES -#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes - -#undef TARGET_ASM_OUTPUT_MI_THUNK -#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk -#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK -#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true - -#undef TARGET_ASM_FILE_START -#define TARGET_ASM_FILE_START ia64_file_start - -#undef TARGET_RTX_COSTS -#define TARGET_RTX_COSTS ia64_rtx_costs -#undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 - -#undef TARGET_MACHINE_DEPENDENT_REORG -#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg - -#undef TARGET_ENCODE_SECTION_INFO -#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info - -#undef TARGET_SECTION_TYPE_FLAGS -#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags - -#ifdef HAVE_AS_TLS -#undef TARGET_ASM_OUTPUT_DWARF_DTPREL -#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel -#endif - -/* ??? ABI doesn't allow us to define this. */ -#if 0 -#undef TARGET_PROMOTE_FUNCTION_ARGS -#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true -#endif - -/* ??? ABI doesn't allow us to define this. */ -#if 0 -#undef TARGET_PROMOTE_FUNCTION_RETURN -#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true -#endif - -/* ??? Investigate. */ -#if 0 -#undef TARGET_PROMOTE_PROTOTYPES -#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true -#endif - -#undef TARGET_STRUCT_VALUE_RTX -#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx -#undef TARGET_RETURN_IN_MEMORY -#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory -#undef TARGET_SETUP_INCOMING_VARARGS -#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs -#undef TARGET_STRICT_ARGUMENT_NAMING -#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true -#undef TARGET_MUST_PASS_IN_STACK -#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size - -#undef TARGET_GIMPLIFY_VA_ARG_EXPR -#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg - -#undef TARGET_UNWIND_EMIT -#define TARGET_UNWIND_EMIT process_for_unwind_directive - -#undef TARGET_SCALAR_MODE_SUPPORTED_P -#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p -#undef TARGET_VECTOR_MODE_SUPPORTED_P -#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p - -/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur - in an order different from the specified program order. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - -#undef TARGET_DEFAULT_TARGET_FLAGS -#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT) -#undef TARGET_HANDLE_OPTION -#define TARGET_HANDLE_OPTION ia64_handle_option - -#undef TARGET_CANNOT_FORCE_CONST_MEM -#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem - -#undef TARGET_MANGLE_FUNDAMENTAL_TYPE -#define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type - -#undef TARGET_INVALID_CONVERSION -#define TARGET_INVALID_CONVERSION ia64_invalid_conversion -#undef TARGET_INVALID_UNARY_OP -#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op -#undef TARGET_INVALID_BINARY_OP -#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op - -struct gcc_target targetm = TARGET_INITIALIZER; - -typedef enum - { - ADDR_AREA_NORMAL, /* normal address area */ - ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */ - } -ia64_addr_area; - -static GTY(()) tree small_ident1; -static GTY(()) tree small_ident2; - -static void -init_idents (void) -{ - if (small_ident1 == 0) - { - small_ident1 = get_identifier ("small"); - small_ident2 = get_identifier ("__small__"); - } -} - -/* Retrieve the address area that has been chosen for the given decl. */ - -static ia64_addr_area -ia64_get_addr_area (tree decl) -{ - tree model_attr; - - model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl)); - if (model_attr) - { - tree id; - - init_idents (); - id = TREE_VALUE (TREE_VALUE (model_attr)); - if (id == small_ident1 || id == small_ident2) - return ADDR_AREA_SMALL; - } - return ADDR_AREA_NORMAL; -} - -static tree -ia64_handle_model_attribute (tree *node, tree name, tree args, - int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) -{ - ia64_addr_area addr_area = ADDR_AREA_NORMAL; - ia64_addr_area area; - tree arg, decl = *node; - - init_idents (); - arg = TREE_VALUE (args); - if (arg == small_ident1 || arg == small_ident2) - { - addr_area = ADDR_AREA_SMALL; - } - else - { - warning (OPT_Wattributes, "invalid argument of %qs attribute", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - } - - switch (TREE_CODE (decl)) - { - case VAR_DECL: - if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) - == FUNCTION_DECL) - && !TREE_STATIC (decl)) - { - error ("%Jan address area attribute cannot be specified for " - "local variables", decl); - *no_add_attrs = true; - } - area = ia64_get_addr_area (decl); - if (area != ADDR_AREA_NORMAL && addr_area != area) - { - error ("address area of %q+D conflicts with previous " - "declaration", decl); - *no_add_attrs = true; - } - break; - - case FUNCTION_DECL: - error ("%Jaddress area attribute cannot be specified for functions", - decl); - *no_add_attrs = true; - break; - - default: - warning (OPT_Wattributes, "%qs attribute ignored", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - break; - } - - return NULL_TREE; -} - -static void -ia64_encode_addr_area (tree decl, rtx symbol) -{ - int flags; - - flags = SYMBOL_REF_FLAGS (symbol); - switch (ia64_get_addr_area (decl)) - { - case ADDR_AREA_NORMAL: break; - case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break; - default: gcc_unreachable (); - } - SYMBOL_REF_FLAGS (symbol) = flags; -} - -static void -ia64_encode_section_info (tree decl, rtx rtl, int first) -{ - default_encode_section_info (decl, rtl, first); - - /* Careful not to prod global register variables. */ - if (TREE_CODE (decl) == VAR_DECL - && GET_CODE (DECL_RTL (decl)) == MEM - && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF - && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))) - ia64_encode_addr_area (decl, XEXP (rtl, 0)); -} - -/* Implement CONST_OK_FOR_LETTER_P. */ - -bool -ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c) -{ - switch (c) - { - case 'I': - return CONST_OK_FOR_I (value); - case 'J': - return CONST_OK_FOR_J (value); - case 'K': - return CONST_OK_FOR_K (value); - case 'L': - return CONST_OK_FOR_L (value); - case 'M': - return CONST_OK_FOR_M (value); - case 'N': - return CONST_OK_FOR_N (value); - case 'O': - return CONST_OK_FOR_O (value); - case 'P': - return CONST_OK_FOR_P (value); - default: - return false; - } -} - -/* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */ - -bool -ia64_const_double_ok_for_letter_p (rtx value, char c) -{ - switch (c) - { - case 'G': - return CONST_DOUBLE_OK_FOR_G (value); - default: - return false; - } -} - -/* Implement EXTRA_CONSTRAINT. */ - -bool -ia64_extra_constraint (rtx value, char c) -{ - switch (c) - { - case 'Q': - /* Non-volatile memory for FP_REG loads/stores. */ - return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value); - - case 'R': - /* 1..4 for shladd arguments. */ - return (GET_CODE (value) == CONST_INT - && INTVAL (value) >= 1 && INTVAL (value) <= 4); - - case 'S': - /* Non-post-inc memory for asms and other unsavory creatures. */ - return (GET_CODE (value) == MEM - && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC - && (reload_in_progress || memory_operand (value, VOIDmode))); - - case 'T': - /* Symbol ref to small-address-area. */ - return small_addr_symbolic_operand (value, VOIDmode); - - case 'U': - /* Vector zero. */ - return value == CONST0_RTX (GET_MODE (value)); - - case 'W': - /* An integer vector, such that conversion to an integer yields a - value appropriate for an integer 'J' constraint. */ - if (GET_CODE (value) == CONST_VECTOR - && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT) - { - value = simplify_subreg (DImode, value, GET_MODE (value), 0); - return ia64_const_ok_for_letter_p (INTVAL (value), 'J'); - } - return false; - - case 'Y': - /* A V2SF vector containing elements that satisfy 'G'. */ - return - (GET_CODE (value) == CONST_VECTOR - && GET_MODE (value) == V2SFmode - && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G') - && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G')); - - default: - return false; - } -} - -/* Return 1 if the operands of a move are ok. */ - -int -ia64_move_ok (rtx dst, rtx src) -{ - /* If we're under init_recog_no_volatile, we'll not be able to use - memory_operand. So check the code directly and don't worry about - the validity of the underlying address, which should have been - checked elsewhere anyway. */ - if (GET_CODE (dst) != MEM) - return 1; - if (GET_CODE (src) == MEM) - return 0; - if (register_operand (src, VOIDmode)) - return 1; - - /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ - if (INTEGRAL_MODE_P (GET_MODE (dst))) - return src == const0_rtx; - else - return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src); -} - -/* Return 1 if the operands are ok for a floating point load pair. */ - -int -ia64_load_pair_ok (rtx dst, rtx src) -{ - if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst))) - return 0; - if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src)) - return 0; - switch (GET_CODE (XEXP (src, 0))) - { - case REG: - case POST_INC: - break; - case POST_DEC: - return 0; - case POST_MODIFY: - { - rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1); - - if (GET_CODE (adjust) != CONST_INT - || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src))) - return 0; - } - break; - default: - abort (); - } - return 1; -} - -int -addp4_optimize_ok (rtx op1, rtx op2) -{ - return (basereg_operand (op1, GET_MODE(op1)) != - basereg_operand (op2, GET_MODE(op2))); -} - -/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction. - Return the length of the field, or <= 0 on failure. */ - -int -ia64_depz_field_mask (rtx rop, rtx rshift) -{ - unsigned HOST_WIDE_INT op = INTVAL (rop); - unsigned HOST_WIDE_INT shift = INTVAL (rshift); - - /* Get rid of the zero bits we're shifting in. */ - op >>= shift; - - /* We must now have a solid block of 1's at bit 0. */ - return exact_log2 (op + 1); -} - -/* Return the TLS model to use for ADDR. */ - -static enum tls_model -tls_symbolic_operand_type (rtx addr) -{ - enum tls_model tls_kind = 0; - - if (GET_CODE (addr) == CONST) - { - if (GET_CODE (XEXP (addr, 0)) == PLUS - && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF) - tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0)); - } - else if (GET_CODE (addr) == SYMBOL_REF) - tls_kind = SYMBOL_REF_TLS_MODEL (addr); - - return tls_kind; -} - -/* Return true if X is a constant that is valid for some immediate - field in an instruction. */ - -bool -ia64_legitimate_constant_p (rtx x) -{ - switch (GET_CODE (x)) - { - case CONST_INT: - case LABEL_REF: - return true; - - case CONST_DOUBLE: - if (GET_MODE (x) == VOIDmode) - return true; - return CONST_DOUBLE_OK_FOR_G (x); - - case CONST: - case SYMBOL_REF: - /* ??? Short term workaround for PR 28490. We must make the code here - match the code in ia64_expand_move and move_operand, even though they - are both technically wrong. */ - if (tls_symbolic_operand_type (x) == 0) - { - HOST_WIDE_INT addend = 0; - rtx op = x; - - if (GET_CODE (op) == CONST - && GET_CODE (XEXP (op, 0)) == PLUS - && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) - { - addend = INTVAL (XEXP (XEXP (op, 0), 1)); - op = XEXP (XEXP (op, 0), 0); - } - - if (any_offset_symbol_operand (op, GET_MODE (op)) - || function_operand (op, GET_MODE (op))) - return true; - if (aligned_offset_symbol_operand (op, GET_MODE (op))) - return (addend & 0x3fff) == 0; - return false; - } - return false; - - case CONST_VECTOR: - { - enum machine_mode mode = GET_MODE (x); - - if (mode == V2SFmode) - return ia64_extra_constraint (x, 'Y'); - - return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT - && GET_MODE_SIZE (mode) <= 8); - } - - default: - return false; - } -} - -/* Don't allow TLS addresses to get spilled to memory. */ - -static bool -ia64_cannot_force_const_mem (rtx x) -{ - return tls_symbolic_operand_type (x) != 0; -} - -/* Expand a symbolic constant load. */ - -bool -ia64_expand_load_address (rtx dest, rtx src) -{ - gcc_assert (GET_CODE (dest) == REG); - - /* ILP32 mode still loads 64-bits of data from the GOT. This avoids - having to pointer-extend the value afterward. Other forms of address - computation below are also more natural to compute as 64-bit quantities. - If we've been given an SImode destination register, change it. */ - if (GET_MODE (dest) != Pmode) - dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0); - - if (TARGET_NO_PIC) - return false; - if (small_addr_symbolic_operand (src, VOIDmode)) - return false; - - if (TARGET_AUTO_PIC) - emit_insn (gen_load_gprel64 (dest, src)); - else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src)) - emit_insn (gen_load_fptr (dest, src)); - else if (sdata_symbolic_operand (src, VOIDmode)) - emit_insn (gen_load_gprel (dest, src)); - else - { - HOST_WIDE_INT addend = 0; - rtx tmp; - - /* We did split constant offsets in ia64_expand_move, and we did try - to keep them split in move_operand, but we also allowed reload to - rematerialize arbitrary constants rather than spill the value to - the stack and reload it. So we have to be prepared here to split - them apart again. */ - if (GET_CODE (src) == CONST) - { - HOST_WIDE_INT hi, lo; - - hi = INTVAL (XEXP (XEXP (src, 0), 1)); - lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000; - hi = hi - lo; - - if (lo != 0) - { - addend = lo; - src = plus_constant (XEXP (XEXP (src, 0), 0), hi); - } - } - - tmp = gen_rtx_HIGH (Pmode, src); - tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx); - emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); - - tmp = gen_rtx_LO_SUM (Pmode, dest, src); - emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); - - if (addend) - { - tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend)); - emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); - } - } - - return true; -} - -static GTY(()) rtx gen_tls_tga; -static rtx -gen_tls_get_addr (void) -{ - if (!gen_tls_tga) - gen_tls_tga = init_one_libfunc ("__tls_get_addr"); - return gen_tls_tga; -} - -static GTY(()) rtx thread_pointer_rtx; -static rtx -gen_thread_pointer (void) -{ - if (!thread_pointer_rtx) - thread_pointer_rtx = gen_rtx_REG (Pmode, 13); - return thread_pointer_rtx; -} - -static rtx -ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1, - rtx orig_op1, HOST_WIDE_INT addend) -{ - rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns; - rtx orig_op0 = op0; - HOST_WIDE_INT addend_lo, addend_hi; - - switch (tls_kind) - { - case TLS_MODEL_GLOBAL_DYNAMIC: - start_sequence (); - - tga_op1 = gen_reg_rtx (Pmode); - emit_insn (gen_load_dtpmod (tga_op1, op1)); - - tga_op2 = gen_reg_rtx (Pmode); - emit_insn (gen_load_dtprel (tga_op2, op1)); - - tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, - LCT_CONST, Pmode, 2, tga_op1, - Pmode, tga_op2, Pmode); - - insns = get_insns (); - end_sequence (); - - if (GET_MODE (op0) != Pmode) - op0 = tga_ret; - emit_libcall_block (insns, op0, tga_ret, op1); - break; - - case TLS_MODEL_LOCAL_DYNAMIC: - /* ??? This isn't the completely proper way to do local-dynamic - If the call to __tls_get_addr is used only by a single symbol, - then we should (somehow) move the dtprel to the second arg - to avoid the extra add. */ - start_sequence (); - - tga_op1 = gen_reg_rtx (Pmode); - emit_insn (gen_load_dtpmod (tga_op1, op1)); - - tga_op2 = const0_rtx; - - tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, - LCT_CONST, Pmode, 2, tga_op1, - Pmode, tga_op2, Pmode); - - insns = get_insns (); - end_sequence (); - - tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), - UNSPEC_LD_BASE); - tmp = gen_reg_rtx (Pmode); - emit_libcall_block (insns, tmp, tga_ret, tga_eqv); - - if (!register_operand (op0, Pmode)) - op0 = gen_reg_rtx (Pmode); - if (TARGET_TLS64) - { - emit_insn (gen_load_dtprel (op0, op1)); - emit_insn (gen_adddi3 (op0, tmp, op0)); - } - else - emit_insn (gen_add_dtprel (op0, op1, tmp)); - break; - - case TLS_MODEL_INITIAL_EXEC: - addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; - addend_hi = addend - addend_lo; - - op1 = plus_constant (op1, addend_hi); - addend = addend_lo; - - tmp = gen_reg_rtx (Pmode); - emit_insn (gen_load_tprel (tmp, op1)); - - if (!register_operand (op0, Pmode)) - op0 = gen_reg_rtx (Pmode); - emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); - break; - - case TLS_MODEL_LOCAL_EXEC: - if (!register_operand (op0, Pmode)) - op0 = gen_reg_rtx (Pmode); - - op1 = orig_op1; - addend = 0; - if (TARGET_TLS64) - { - emit_insn (gen_load_tprel (op0, op1)); - emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ())); - } - else - emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ())); - break; - - default: - gcc_unreachable (); - } - - if (addend) - op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend), - orig_op0, 1, OPTAB_DIRECT); - if (orig_op0 == op0) - return NULL_RTX; - if (GET_MODE (orig_op0) == Pmode) - return op0; - return gen_lowpart (GET_MODE (orig_op0), op0); -} - -rtx -ia64_expand_move (rtx op0, rtx op1) -{ - enum machine_mode mode = GET_MODE (op0); - - if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) - op1 = force_reg (mode, op1); - - if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode)) - { - HOST_WIDE_INT addend = 0; - enum tls_model tls_kind; - rtx sym = op1; - - if (GET_CODE (op1) == CONST - && GET_CODE (XEXP (op1, 0)) == PLUS - && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT) - { - addend = INTVAL (XEXP (XEXP (op1, 0), 1)); - sym = XEXP (XEXP (op1, 0), 0); - } - - tls_kind = tls_symbolic_operand_type (sym); - if (tls_kind) - return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend); - - if (any_offset_symbol_operand (sym, mode)) - addend = 0; - else if (aligned_offset_symbol_operand (sym, mode)) - { - HOST_WIDE_INT addend_lo, addend_hi; - - addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; - addend_hi = addend - addend_lo; - - if (addend_lo != 0) - { - op1 = plus_constant (sym, addend_hi); - addend = addend_lo; - } - else - addend = 0; - } - else - op1 = sym; - - if (reload_completed) - { - /* We really should have taken care of this offset earlier. */ - gcc_assert (addend == 0); - if (ia64_expand_load_address (op0, op1)) - return NULL_RTX; - } - - if (addend) - { - rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode); - - emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1)); - - op1 = expand_simple_binop (mode, PLUS, subtarget, - GEN_INT (addend), op0, 1, OPTAB_DIRECT); - if (op0 == op1) - return NULL_RTX; - } - } - - return op1; -} - -/* Split a move from OP1 to OP0 conditional on COND. */ - -void -ia64_emit_cond_move (rtx op0, rtx op1, rtx cond) -{ - rtx insn, first = get_last_insn (); - - emit_move_insn (op0, op1); - - for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn)) - if (INSN_P (insn)) - PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), - PATTERN (insn)); -} - -/* Split a post-reload TImode or TFmode reference into two DImode - components. This is made extra difficult by the fact that we do - not get any scratch registers to work with, because reload cannot - be prevented from giving us a scratch that overlaps the register - pair involved. So instead, when addressing memory, we tweak the - pointer register up and back down with POST_INCs. Or up and not - back down when we can get away with it. - - REVERSED is true when the loads must be done in reversed order - (high word first) for correctness. DEAD is true when the pointer - dies with the second insn we generate and therefore the second - address must not carry a postmodify. - - May return an insn which is to be emitted after the moves. */ - -static rtx -ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead) -{ - rtx fixup = 0; - - switch (GET_CODE (in)) - { - case REG: - out[reversed] = gen_rtx_REG (DImode, REGNO (in)); - out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1); - break; - - case CONST_INT: - case CONST_DOUBLE: - /* Cannot occur reversed. */ - gcc_assert (!reversed); - - if (GET_MODE (in) != TFmode) - split_double (in, &out[0], &out[1]); - else - /* split_double does not understand how to split a TFmode - quantity into a pair of DImode constants. */ - { - REAL_VALUE_TYPE r; - unsigned HOST_WIDE_INT p[2]; - long l[4]; /* TFmode is 128 bits */ - - REAL_VALUE_FROM_CONST_DOUBLE (r, in); - real_to_target (l, &r, TFmode); - - if (FLOAT_WORDS_BIG_ENDIAN) - { - p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1]; - p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3]; - } - else - { - p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2]; - p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0]; - } - out[0] = GEN_INT (p[0]); - out[1] = GEN_INT (p[1]); - } - break; - - case MEM: - { - rtx base = XEXP (in, 0); - rtx offset; - - switch (GET_CODE (base)) - { - case REG: - if (!reversed) - { - out[0] = adjust_automodify_address - (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); - out[1] = adjust_automodify_address - (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8); - } - else - { - /* Reversal requires a pre-increment, which can only - be done as a separate insn. */ - emit_insn (gen_adddi3 (base, base, GEN_INT (8))); - out[0] = adjust_automodify_address - (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8); - out[1] = adjust_address (in, DImode, 0); - } - break; - - case POST_INC: - gcc_assert (!reversed && !dead); - - /* Just do the increment in two steps. */ - out[0] = adjust_automodify_address (in, DImode, 0, 0); - out[1] = adjust_automodify_address (in, DImode, 0, 8); - break; - - case POST_DEC: - gcc_assert (!reversed && !dead); - - /* Add 8, subtract 24. */ - base = XEXP (base, 0); - out[0] = adjust_automodify_address - (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); - out[1] = adjust_automodify_address - (in, DImode, - gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)), - 8); - break; - - case POST_MODIFY: - gcc_assert (!reversed && !dead); - - /* Extract and adjust the modification. This case is - trickier than the others, because we might have an - index register, or we might have a combined offset that - doesn't fit a signed 9-bit displacement field. We can - assume the incoming expression is already legitimate. */ - offset = XEXP (base, 1); - base = XEXP (base, 0); - - out[0] = adjust_automodify_address - (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); - - if (GET_CODE (XEXP (offset, 1)) == REG) - { - /* Can't adjust the postmodify to match. Emit the - original, then a separate addition insn. */ - out[1] = adjust_automodify_address (in, DImode, 0, 8); - fixup = gen_adddi3 (base, base, GEN_INT (-8)); - } - else - { - gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT); - if (INTVAL (XEXP (offset, 1)) < -256 + 8) - { - /* Again the postmodify cannot be made to match, - but in this case it's more efficient to get rid - of the postmodify entirely and fix up with an - add insn. */ - out[1] = adjust_automodify_address (in, DImode, base, 8); - fixup = gen_adddi3 - (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8)); - } - else - { - /* Combined offset still fits in the displacement field. - (We cannot overflow it at the high end.) */ - out[1] = adjust_automodify_address - (in, DImode, gen_rtx_POST_MODIFY - (Pmode, base, gen_rtx_PLUS - (Pmode, base, - GEN_INT (INTVAL (XEXP (offset, 1)) - 8))), - 8); - } - } - break; - - default: - gcc_unreachable (); - } - break; - } - - default: - gcc_unreachable (); - } - - return fixup; -} - -/* Split a TImode or TFmode move instruction after reload. - This is used by *movtf_internal and *movti_internal. */ -void -ia64_split_tmode_move (rtx operands[]) -{ - rtx in[2], out[2], insn; - rtx fixup[2]; - bool dead = false; - bool reversed = false; - - /* It is possible for reload to decide to overwrite a pointer with - the value it points to. In that case we have to do the loads in - the appropriate order so that the pointer is not destroyed too - early. Also we must not generate a postmodify for that second - load, or rws_access_regno will die. */ - if (GET_CODE (operands[1]) == MEM - && reg_overlap_mentioned_p (operands[0], operands[1])) - { - rtx base = XEXP (operands[1], 0); - while (GET_CODE (base) != REG) - base = XEXP (base, 0); - - if (REGNO (base) == REGNO (operands[0])) - reversed = true; - dead = true; - } - /* Another reason to do the moves in reversed order is if the first - element of the target register pair is also the second element of - the source register pair. */ - if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG - && REGNO (operands[0]) == REGNO (operands[1]) + 1) - reversed = true; - - fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead); - fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead); - -#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \ - if (GET_CODE (EXP) == MEM \ - && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \ - || GET_CODE (XEXP (EXP, 0)) == POST_INC \ - || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \ - REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC, \ - XEXP (XEXP (EXP, 0), 0), \ - REG_NOTES (INSN)) - - insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0])); - MAYBE_ADD_REG_INC_NOTE (insn, in[0]); - MAYBE_ADD_REG_INC_NOTE (insn, out[0]); - - insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1])); - MAYBE_ADD_REG_INC_NOTE (insn, in[1]); - MAYBE_ADD_REG_INC_NOTE (insn, out[1]); - - if (fixup[0]) - emit_insn (fixup[0]); - if (fixup[1]) - emit_insn (fixup[1]); - -#undef MAYBE_ADD_REG_INC_NOTE -} - -/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go - through memory plus an extra GR scratch register. Except that you can - either get the first from SECONDARY_MEMORY_NEEDED or the second from - SECONDARY_RELOAD_CLASS, but not both. - - We got into problems in the first place by allowing a construct like - (subreg:XF (reg:TI)), which we got from a union containing a long double. - This solution attempts to prevent this situation from occurring. When - we see something like the above, we spill the inner register to memory. */ - -static rtx -spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode) -{ - if (GET_CODE (in) == SUBREG - && GET_MODE (SUBREG_REG (in)) == TImode - && GET_CODE (SUBREG_REG (in)) == REG) - { - rtx memt = assign_stack_temp (TImode, 16, 0); - emit_move_insn (memt, SUBREG_REG (in)); - return adjust_address (memt, mode, 0); - } - else if (force && GET_CODE (in) == REG) - { - rtx memx = assign_stack_temp (mode, 16, 0); - emit_move_insn (memx, in); - return memx; - } - else - return in; -} - -/* Expand the movxf or movrf pattern (MODE says which) with the given - OPERANDS, returning true if the pattern should then invoke - DONE. */ - -bool -ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[]) -{ - rtx op0 = operands[0]; - - if (GET_CODE (op0) == SUBREG) - op0 = SUBREG_REG (op0); - - /* We must support XFmode loads into general registers for stdarg/vararg, - unprototyped calls, and a rare case where a long double is passed as - an argument after a float HFA fills the FP registers. We split them into - DImode loads for convenience. We also need to support XFmode stores - for the last case. This case does not happen for stdarg/vararg routines, - because we do a block store to memory of unnamed arguments. */ - - if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0))) - { - rtx out[2]; - - /* We're hoping to transform everything that deals with XFmode - quantities and GR registers early in the compiler. */ - gcc_assert (!no_new_pseudos); - - /* Struct to register can just use TImode instead. */ - if ((GET_CODE (operands[1]) == SUBREG - && GET_MODE (SUBREG_REG (operands[1])) == TImode) - || (GET_CODE (operands[1]) == REG - && GR_REGNO_P (REGNO (operands[1])))) - { - rtx op1 = operands[1]; - - if (GET_CODE (op1) == SUBREG) - op1 = SUBREG_REG (op1); - else - op1 = gen_rtx_REG (TImode, REGNO (op1)); - - emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1); - return true; - } - - if (GET_CODE (operands[1]) == CONST_DOUBLE) - { - /* Don't word-swap when reading in the constant. */ - emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)), - operand_subword (operands[1], WORDS_BIG_ENDIAN, - 0, mode)); - emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1), - operand_subword (operands[1], !WORDS_BIG_ENDIAN, - 0, mode)); - return true; - } - - /* If the quantity is in a register not known to be GR, spill it. */ - if (register_operand (operands[1], mode)) - operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode); - - gcc_assert (GET_CODE (operands[1]) == MEM); - - /* Don't word-swap when reading in the value. */ - out[0] = gen_rtx_REG (DImode, REGNO (op0)); - out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1); - - emit_move_insn (out[0], adjust_address (operands[1], DImode, 0)); - emit_move_insn (out[1], adjust_address (operands[1], DImode, 8)); - return true; - } - - if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1]))) - { - /* We're hoping to transform everything that deals with XFmode - quantities and GR registers early in the compiler. */ - gcc_assert (!no_new_pseudos); - - /* Op0 can't be a GR_REG here, as that case is handled above. - If op0 is a register, then we spill op1, so that we now have a - MEM operand. This requires creating an XFmode subreg of a TImode reg - to force the spill. */ - if (register_operand (operands[0], mode)) - { - rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); - op1 = gen_rtx_SUBREG (mode, op1, 0); - operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode); - } - - else - { - rtx in[2]; - - gcc_assert (GET_CODE (operands[0]) == MEM); - - /* Don't word-swap when writing out the value. */ - in[0] = gen_rtx_REG (DImode, REGNO (operands[1])); - in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); - - emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]); - emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]); - return true; - } - } - - if (!reload_in_progress && !reload_completed) - { - operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode); - - if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG) - { - rtx memt, memx, in = operands[1]; - if (CONSTANT_P (in)) - in = validize_mem (force_const_mem (mode, in)); - if (GET_CODE (in) == MEM) - memt = adjust_address (in, TImode, 0); - else - { - memt = assign_stack_temp (TImode, 16, 0); - memx = adjust_address (memt, mode, 0); - emit_move_insn (memx, in); - } - emit_move_insn (op0, memt); - return true; - } - - if (!ia64_move_ok (operands[0], operands[1])) - operands[1] = force_reg (mode, operands[1]); - } - - return false; -} - -/* Emit comparison instruction if necessary, returning the expression - that holds the compare result in the proper mode. */ - -static GTY(()) rtx cmptf_libfunc; - -rtx -ia64_expand_compare (enum rtx_code code, enum machine_mode mode) -{ - rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1; - rtx cmp; - - /* If we have a BImode input, then we already have a compare result, and - do not need to emit another comparison. */ - if (GET_MODE (op0) == BImode) - { - gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx); - cmp = op0; - } - /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a - magic number as its third argument, that indicates what to do. - The return value is an integer to be compared against zero. */ - else if (GET_MODE (op0) == TFmode) - { - enum qfcmp_magic { - QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */ - QCMP_UNORD = 2, - QCMP_EQ = 4, - QCMP_LT = 8, - QCMP_GT = 16 - } magic; - enum rtx_code ncode; - rtx ret, insns; - - gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode); - switch (code) - { - /* 1 = equal, 0 = not equal. Equality operators do - not raise FP_INVALID when given an SNaN operand. */ - case EQ: magic = QCMP_EQ; ncode = NE; break; - case NE: magic = QCMP_EQ; ncode = EQ; break; - /* isunordered() from C99. */ - case UNORDERED: magic = QCMP_UNORD; ncode = NE; break; - case ORDERED: magic = QCMP_UNORD; ncode = EQ; break; - /* Relational operators raise FP_INVALID when given - an SNaN operand. */ - case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break; - case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break; - case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break; - case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break; - /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT. - Expanders for buneq etc. weuld have to be added to ia64.md - for this to be useful. */ - default: gcc_unreachable (); - } - - start_sequence (); - - ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3, - op0, TFmode, op1, TFmode, - GEN_INT (magic), DImode); - cmp = gen_reg_rtx (BImode); - emit_insn (gen_rtx_SET (VOIDmode, cmp, - gen_rtx_fmt_ee (ncode, BImode, - ret, const0_rtx))); - - insns = get_insns (); - end_sequence (); - - emit_libcall_block (insns, cmp, cmp, - gen_rtx_fmt_ee (code, BImode, op0, op1)); - code = NE; - } - else - { - cmp = gen_reg_rtx (BImode); - emit_insn (gen_rtx_SET (VOIDmode, cmp, - gen_rtx_fmt_ee (code, BImode, op0, op1))); - code = NE; - } - - return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx); -} - -/* Generate an integral vector comparison. Return true if the condition has - been reversed, and so the sense of the comparison should be inverted. */ - -static bool -ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode, - rtx dest, rtx op0, rtx op1) -{ - bool negate = false; - rtx x; - - /* Canonicalize the comparison to EQ, GT, GTU. */ - switch (code) - { - case EQ: - case GT: - case GTU: - break; - - case NE: - case LE: - case LEU: - code = reverse_condition (code); - negate = true; - break; - - case GE: - case GEU: - code = reverse_condition (code); - negate = true; - /* FALLTHRU */ - - case LT: - case LTU: - code = swap_condition (code); - x = op0, op0 = op1, op1 = x; - break; - - default: - gcc_unreachable (); - } - - /* Unsigned parallel compare is not supported by the hardware. Play some - tricks to turn this into a signed comparison against 0. */ - if (code == GTU) - { - switch (mode) - { - case V2SImode: - { - rtx t1, t2, mask; - - /* Perform a parallel modulo subtraction. */ - t1 = gen_reg_rtx (V2SImode); - emit_insn (gen_subv2si3 (t1, op0, op1)); - - /* Extract the original sign bit of op0. */ - mask = GEN_INT (-0x80000000); - mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask)); - mask = force_reg (V2SImode, mask); - t2 = gen_reg_rtx (V2SImode); - emit_insn (gen_andv2si3 (t2, op0, mask)); - - /* XOR it back into the result of the subtraction. This results - in the sign bit set iff we saw unsigned underflow. */ - x = gen_reg_rtx (V2SImode); - emit_insn (gen_xorv2si3 (x, t1, t2)); - - code = GT; - op0 = x; - op1 = CONST0_RTX (mode); - } - break; - - case V8QImode: - case V4HImode: - /* Perform a parallel unsigned saturating subtraction. */ - x = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, x, - gen_rtx_US_MINUS (mode, op0, op1))); - - code = EQ; - op0 = x; - op1 = CONST0_RTX (mode); - negate = !negate; - break; - - default: - gcc_unreachable (); - } - } - - x = gen_rtx_fmt_ee (code, mode, op0, op1); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - - return negate; -} - -/* Emit an integral vector conditional move. */ - -void -ia64_expand_vecint_cmov (rtx operands[]) -{ - enum machine_mode mode = GET_MODE (operands[0]); - enum rtx_code code = GET_CODE (operands[3]); - bool negate; - rtx cmp, x, ot, of; - - cmp = gen_reg_rtx (mode); - negate = ia64_expand_vecint_compare (code, mode, cmp, - operands[4], operands[5]); - - ot = operands[1+negate]; - of = operands[2-negate]; - - if (ot == CONST0_RTX (mode)) - { - if (of == CONST0_RTX (mode)) - { - emit_move_insn (operands[0], ot); - return; - } - - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, of); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); - } - else if (of == CONST0_RTX (mode)) - { - x = gen_rtx_AND (mode, cmp, ot); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); - } - else - { - rtx t, f; - - t = gen_reg_rtx (mode); - x = gen_rtx_AND (mode, cmp, operands[1+negate]); - emit_insn (gen_rtx_SET (VOIDmode, t, x)); - - f = gen_reg_rtx (mode); - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, operands[2-negate]); - emit_insn (gen_rtx_SET (VOIDmode, f, x)); - - x = gen_rtx_IOR (mode, t, f); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); - } -} - -/* Emit an integral vector min or max operation. Return true if all done. */ - -bool -ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, - rtx operands[]) -{ - rtx xops[6]; - - /* These four combinations are supported directly. */ - if (mode == V8QImode && (code == UMIN || code == UMAX)) - return false; - if (mode == V4HImode && (code == SMIN || code == SMAX)) - return false; - - /* This combination can be implemented with only saturating subtraction. */ - if (mode == V4HImode && code == UMAX) - { - rtx x, tmp = gen_reg_rtx (mode); - - x = gen_rtx_US_MINUS (mode, operands[1], operands[2]); - emit_insn (gen_rtx_SET (VOIDmode, tmp, x)); - - emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2])); - return true; - } - - /* Everything else implemented via vector comparisons. */ - xops[0] = operands[0]; - xops[4] = xops[1] = operands[1]; - xops[5] = xops[2] = operands[2]; - - switch (code) - { - case UMIN: - code = LTU; - break; - case UMAX: - code = GTU; - break; - case SMIN: - code = LT; - break; - case SMAX: - code = GT; - break; - default: - gcc_unreachable (); - } - xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); - - ia64_expand_vecint_cmov (xops); - return true; -} - -/* Emit an integral vector widening sum operations. */ - -void -ia64_expand_widen_sum (rtx operands[3], bool unsignedp) -{ - rtx l, h, x, s; - enum machine_mode wmode, mode; - rtx (*unpack_l) (rtx, rtx, rtx); - rtx (*unpack_h) (rtx, rtx, rtx); - rtx (*plus) (rtx, rtx, rtx); - - wmode = GET_MODE (operands[0]); - mode = GET_MODE (operands[1]); - - switch (mode) - { - case V8QImode: - unpack_l = gen_unpack1_l; - unpack_h = gen_unpack1_h; - plus = gen_addv4hi3; - break; - case V4HImode: - unpack_l = gen_unpack2_l; - unpack_h = gen_unpack2_h; - plus = gen_addv2si3; - break; - default: - gcc_unreachable (); - } - - /* Fill in x with the sign extension of each element in op1. */ - if (unsignedp) - x = CONST0_RTX (mode); - else - { - bool neg; - - x = gen_reg_rtx (mode); - - neg = ia64_expand_vecint_compare (LT, mode, x, operands[1], - CONST0_RTX (mode)); - gcc_assert (!neg); - } - - l = gen_reg_rtx (wmode); - h = gen_reg_rtx (wmode); - s = gen_reg_rtx (wmode); - - emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x)); - emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x)); - emit_insn (plus (s, l, operands[2])); - emit_insn (plus (operands[0], h, s)); -} - -/* Emit a signed or unsigned V8QI dot product operation. */ - -void -ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) -{ - rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3; - - /* Fill in x1 and x2 with the sign extension of each element. */ - if (unsignedp) - x1 = x2 = CONST0_RTX (V8QImode); - else - { - bool neg; - - x1 = gen_reg_rtx (V8QImode); - x2 = gen_reg_rtx (V8QImode); - - neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1], - CONST0_RTX (V8QImode)); - gcc_assert (!neg); - neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2], - CONST0_RTX (V8QImode)); - gcc_assert (!neg); - } - - l1 = gen_reg_rtx (V4HImode); - l2 = gen_reg_rtx (V4HImode); - h1 = gen_reg_rtx (V4HImode); - h2 = gen_reg_rtx (V4HImode); - - emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1)); - emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2)); - emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1)); - emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2)); - - p1 = gen_reg_rtx (V2SImode); - p2 = gen_reg_rtx (V2SImode); - p3 = gen_reg_rtx (V2SImode); - p4 = gen_reg_rtx (V2SImode); - emit_insn (gen_pmpy2_r (p1, l1, l2)); - emit_insn (gen_pmpy2_l (p2, l1, l2)); - emit_insn (gen_pmpy2_r (p3, h1, h2)); - emit_insn (gen_pmpy2_l (p4, h1, h2)); - - s1 = gen_reg_rtx (V2SImode); - s2 = gen_reg_rtx (V2SImode); - s3 = gen_reg_rtx (V2SImode); - emit_insn (gen_addv2si3 (s1, p1, p2)); - emit_insn (gen_addv2si3 (s2, p3, p4)); - emit_insn (gen_addv2si3 (s3, s1, operands[3])); - emit_insn (gen_addv2si3 (operands[0], s2, s3)); -} - -/* Emit the appropriate sequence for a call. */ - -void -ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED, - int sibcall_p) -{ - rtx insn, b0; - - addr = XEXP (addr, 0); - addr = convert_memory_address (DImode, addr); - b0 = gen_rtx_REG (DImode, R_BR (0)); - - /* ??? Should do this for functions known to bind local too. */ - if (TARGET_NO_PIC || TARGET_AUTO_PIC) - { - if (sibcall_p) - insn = gen_sibcall_nogp (addr); - else if (! retval) - insn = gen_call_nogp (addr, b0); - else - insn = gen_call_value_nogp (retval, addr, b0); - insn = emit_call_insn (insn); - } - else - { - if (sibcall_p) - insn = gen_sibcall_gp (addr); - else if (! retval) - insn = gen_call_gp (addr, b0); - else - insn = gen_call_value_gp (retval, addr, b0); - insn = emit_call_insn (insn); - - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); - } - - if (sibcall_p) - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); -} - -void -ia64_reload_gp (void) -{ - rtx tmp; - - if (current_frame_info.reg_save_gp) - tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp); - else - { - HOST_WIDE_INT offset; - - offset = (current_frame_info.spill_cfa_off - + current_frame_info.spill_size); - if (frame_pointer_needed) - { - tmp = hard_frame_pointer_rtx; - offset = -offset; - } - else - { - tmp = stack_pointer_rtx; - offset = current_frame_info.total_size - offset; - } - - if (CONST_OK_FOR_I (offset)) - emit_insn (gen_adddi3 (pic_offset_table_rtx, - tmp, GEN_INT (offset))); - else - { - emit_move_insn (pic_offset_table_rtx, GEN_INT (offset)); - emit_insn (gen_adddi3 (pic_offset_table_rtx, - pic_offset_table_rtx, tmp)); - } - - tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); - } - - emit_move_insn (pic_offset_table_rtx, tmp); -} - -void -ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r, - rtx scratch_b, int noreturn_p, int sibcall_p) -{ - rtx insn; - bool is_desc = false; - - /* If we find we're calling through a register, then we're actually - calling through a descriptor, so load up the values. */ - if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) - { - rtx tmp; - bool addr_dead_p; - - /* ??? We are currently constrained to *not* use peep2, because - we can legitimately change the global lifetime of the GP - (in the form of killing where previously live). This is - because a call through a descriptor doesn't use the previous - value of the GP, while a direct call does, and we do not - commit to either form until the split here. - - That said, this means that we lack precise life info for - whether ADDR is dead after this call. This is not terribly - important, since we can fix things up essentially for free - with the POST_DEC below, but it's nice to not use it when we - can immediately tell it's not necessary. */ - addr_dead_p = ((noreturn_p || sibcall_p - || TEST_HARD_REG_BIT (regs_invalidated_by_call, - REGNO (addr))) - && !FUNCTION_ARG_REGNO_P (REGNO (addr))); - - /* Load the code address into scratch_b. */ - tmp = gen_rtx_POST_INC (Pmode, addr); - tmp = gen_rtx_MEM (Pmode, tmp); - emit_move_insn (scratch_r, tmp); - emit_move_insn (scratch_b, scratch_r); - - /* Load the GP address. If ADDR is not dead here, then we must - revert the change made above via the POST_INCREMENT. */ - if (!addr_dead_p) - tmp = gen_rtx_POST_DEC (Pmode, addr); - else - tmp = addr; - tmp = gen_rtx_MEM (Pmode, tmp); - emit_move_insn (pic_offset_table_rtx, tmp); - - is_desc = true; - addr = scratch_b; - } - - if (sibcall_p) - insn = gen_sibcall_nogp (addr); - else if (retval) - insn = gen_call_value_nogp (retval, addr, retaddr); - else - insn = gen_call_nogp (addr, retaddr); - emit_call_insn (insn); - - if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) - ia64_reload_gp (); -} - -/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically. - - This differs from the generic code in that we know about the zero-extending - properties of cmpxchg, and the zero-extending requirements of ar.ccv. We - also know that ld.acq+cmpxchg.rel equals a full barrier. - - The loop we want to generate looks like - - cmp_reg = mem; - label: - old_reg = cmp_reg; - new_reg = cmp_reg op val; - cmp_reg = compare-and-swap(mem, old_reg, new_reg) - if (cmp_reg != old_reg) - goto label; - - Note that we only do the plain load from memory once. Subsequent - iterations use the value loaded by the compare-and-swap pattern. */ - -void -ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, - rtx old_dst, rtx new_dst) -{ - enum machine_mode mode = GET_MODE (mem); - rtx old_reg, new_reg, cmp_reg, ar_ccv, label; - enum insn_code icode; - - /* Special case for using fetchadd. */ - if ((mode == SImode || mode == DImode) - && (code == PLUS || code == MINUS) - && fetchadd_operand (val, mode)) - { - if (code == MINUS) - val = GEN_INT (-INTVAL (val)); - - if (!old_dst) - old_dst = gen_reg_rtx (mode); - - emit_insn (gen_memory_barrier ()); - - if (mode == SImode) - icode = CODE_FOR_fetchadd_acq_si; - else - icode = CODE_FOR_fetchadd_acq_di; - emit_insn (GEN_FCN (icode) (old_dst, mem, val)); - - if (new_dst) - { - new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst, - true, OPTAB_WIDEN); - if (new_reg != new_dst) - emit_move_insn (new_dst, new_reg); - } - return; - } - - /* Because of the volatile mem read, we get an ld.acq, which is the - front half of the full barrier. The end half is the cmpxchg.rel. */ - gcc_assert (MEM_VOLATILE_P (mem)); - - old_reg = gen_reg_rtx (DImode); - cmp_reg = gen_reg_rtx (DImode); - label = gen_label_rtx (); - - if (mode != DImode) - { - val = simplify_gen_subreg (DImode, val, mode, 0); - emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1)); - } - else - emit_move_insn (cmp_reg, mem); - - emit_label (label); - - ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); - emit_move_insn (old_reg, cmp_reg); - emit_move_insn (ar_ccv, cmp_reg); - - if (old_dst) - emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg)); - - new_reg = cmp_reg; - if (code == NOT) - { - new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true); - code = AND; - } - new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX, - true, OPTAB_DIRECT); - - if (mode != DImode) - new_reg = gen_lowpart (mode, new_reg); - if (new_dst) - emit_move_insn (new_dst, new_reg); - - switch (mode) - { - case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; - case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break; - case SImode: icode = CODE_FOR_cmpxchg_rel_si; break; - case DImode: icode = CODE_FOR_cmpxchg_rel_di; break; - default: - gcc_unreachable (); - } - - emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg)); - - emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label); -} - -/* Begin the assembly file. */ - -static void -ia64_file_start (void) -{ - /* Variable tracking should be run after all optimizations which change order - of insns. It also needs a valid CFG. This can't be done in - ia64_override_options, because flag_var_tracking is finalized after - that. */ - ia64_flag_var_tracking = flag_var_tracking; - flag_var_tracking = 0; - - default_file_start (); - emit_safe_across_calls (); -} - -void -emit_safe_across_calls (void) -{ - unsigned int rs, re; - int out_state; - - rs = 1; - out_state = 0; - while (1) - { - while (rs < 64 && call_used_regs[PR_REG (rs)]) - rs++; - if (rs >= 64) - break; - for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) - continue; - if (out_state == 0) - { - fputs ("\t.pred.safe_across_calls ", asm_out_file); - out_state = 1; - } - else - fputc (',', asm_out_file); - if (re == rs + 1) - fprintf (asm_out_file, "p%u", rs); - else - fprintf (asm_out_file, "p%u-p%u", rs, re - 1); - rs = re + 1; - } - if (out_state) - fputc ('\n', asm_out_file); -} - -/* Helper function for ia64_compute_frame_size: find an appropriate general - register to spill some special register to. SPECIAL_SPILL_MASK contains - bits in GR0 to GR31 that have already been allocated by this routine. - TRY_LOCALS is true if we should attempt to locate a local regnum. */ - -static int -find_gr_spill (int try_locals) -{ - int regno; - - /* If this is a leaf function, first try an otherwise unused - call-clobbered register. */ - if (current_function_is_leaf) - { - for (regno = GR_REG (1); regno <= GR_REG (31); regno++) - if (! regs_ever_live[regno] - && call_used_regs[regno] - && ! fixed_regs[regno] - && ! global_regs[regno] - && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) - { - current_frame_info.gr_used_mask |= 1 << regno; - return regno; - } - } - - if (try_locals) - { - regno = current_frame_info.n_local_regs; - /* If there is a frame pointer, then we can't use loc79, because - that is HARD_FRAME_POINTER_REGNUM. In particular, see the - reg_name switching code in ia64_expand_prologue. */ - if (regno < (80 - frame_pointer_needed)) - { - current_frame_info.n_local_regs = regno + 1; - return LOC_REG (0) + regno; - } - } - - /* Failed to find a general register to spill to. Must use stack. */ - return 0; -} - -/* In order to make for nice schedules, we try to allocate every temporary - to a different register. We must of course stay away from call-saved, - fixed, and global registers. We must also stay away from registers - allocated in current_frame_info.gr_used_mask, since those include regs - used all through the prologue. - - Any register allocated here must be used immediately. The idea is to - aid scheduling, not to solve data flow problems. */ - -static int last_scratch_gr_reg; - -static int -next_scratch_gr_reg (void) -{ - int i, regno; - - for (i = 0; i < 32; ++i) - { - regno = (last_scratch_gr_reg + i + 1) & 31; - if (call_used_regs[regno] - && ! fixed_regs[regno] - && ! global_regs[regno] - && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) - { - last_scratch_gr_reg = regno; - return regno; - } - } - - /* There must be _something_ available. */ - gcc_unreachable (); -} - -/* Helper function for ia64_compute_frame_size, called through - diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ - -static void -mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED) -{ - unsigned int regno = REGNO (reg); - if (regno < 32) - { - unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)]; - for (i = 0; i < n; ++i) - current_frame_info.gr_used_mask |= 1 << (regno + i); - } -} - -/* Returns the number of bytes offset between the frame pointer and the stack - pointer for the current function. SIZE is the number of bytes of space - needed for local variables. */ - -static void -ia64_compute_frame_size (HOST_WIDE_INT size) -{ - HOST_WIDE_INT total_size; - HOST_WIDE_INT spill_size = 0; - HOST_WIDE_INT extra_spill_size = 0; - HOST_WIDE_INT pretend_args_size; - HARD_REG_SET mask; - int n_spilled = 0; - int spilled_gr_p = 0; - int spilled_fr_p = 0; - unsigned int regno; - int i; - - if (current_frame_info.initialized) - return; - - memset (¤t_frame_info, 0, sizeof current_frame_info); - CLEAR_HARD_REG_SET (mask); - - /* Don't allocate scratches to the return register. */ - diddle_return_value (mark_reg_gr_used_mask, NULL); - - /* Don't allocate scratches to the EH scratch registers. */ - if (cfun->machine->ia64_eh_epilogue_sp) - mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); - if (cfun->machine->ia64_eh_epilogue_bsp) - mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); - - /* Find the size of the register stack frame. We have only 80 local - registers, because we reserve 8 for the inputs and 8 for the - outputs. */ - - /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, - since we'll be adjusting that down later. */ - regno = LOC_REG (78) + ! frame_pointer_needed; - for (; regno >= LOC_REG (0); regno--) - if (regs_ever_live[regno]) - break; - current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; - - /* For functions marked with the syscall_linkage attribute, we must mark - all eight input registers as in use, so that locals aren't visible to - the caller. */ - - if (cfun->machine->n_varargs > 0 - || lookup_attribute ("syscall_linkage", - TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) - current_frame_info.n_input_regs = 8; - else - { - for (regno = IN_REG (7); regno >= IN_REG (0); regno--) - if (regs_ever_live[regno]) - break; - current_frame_info.n_input_regs = regno - IN_REG (0) + 1; - } - - for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) - if (regs_ever_live[regno]) - break; - i = regno - OUT_REG (0) + 1; - -#ifndef PROFILE_HOOK - /* When -p profiling, we need one output register for the mcount argument. - Likewise for -a profiling for the bb_init_func argument. For -ax - profiling, we need two output registers for the two bb_init_trace_func - arguments. */ - if (current_function_profile) - i = MAX (i, 1); -#endif - current_frame_info.n_output_regs = i; - - /* ??? No rotating register support yet. */ - current_frame_info.n_rotate_regs = 0; - - /* Discover which registers need spilling, and how much room that - will take. Begin with floating point and general registers, - which will always wind up on the stack. */ - - for (regno = FR_REG (2); regno <= FR_REG (127); regno++) - if (regs_ever_live[regno] && ! call_used_regs[regno]) - { - SET_HARD_REG_BIT (mask, regno); - spill_size += 16; - n_spilled += 1; - spilled_fr_p = 1; - } - - for (regno = GR_REG (1); regno <= GR_REG (31); regno++) - if (regs_ever_live[regno] && ! call_used_regs[regno]) - { - SET_HARD_REG_BIT (mask, regno); - spill_size += 8; - n_spilled += 1; - spilled_gr_p = 1; - } - - for (regno = BR_REG (1); regno <= BR_REG (7); regno++) - if (regs_ever_live[regno] && ! call_used_regs[regno]) - { - SET_HARD_REG_BIT (mask, regno); - spill_size += 8; - n_spilled += 1; - } - - /* Now come all special registers that might get saved in other - general registers. */ - - if (frame_pointer_needed) - { - current_frame_info.reg_fp = find_gr_spill (1); - /* If we did not get a register, then we take LOC79. This is guaranteed - to be free, even if regs_ever_live is already set, because this is - HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, - as we don't count loc79 above. */ - if (current_frame_info.reg_fp == 0) - { - current_frame_info.reg_fp = LOC_REG (79); - current_frame_info.n_local_regs++; - } - } - - if (! current_function_is_leaf) - { - /* Emit a save of BR0 if we call other functions. Do this even - if this function doesn't return, as EH depends on this to be - able to unwind the stack. */ - SET_HARD_REG_BIT (mask, BR_REG (0)); - - current_frame_info.reg_save_b0 = find_gr_spill (1); - if (current_frame_info.reg_save_b0 == 0) - { - extra_spill_size += 8; - n_spilled += 1; - } - - /* Similarly for ar.pfs. */ - SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); - current_frame_info.reg_save_ar_pfs = find_gr_spill (1); - if (current_frame_info.reg_save_ar_pfs == 0) - { - extra_spill_size += 8; - n_spilled += 1; - } - - /* Similarly for gp. Note that if we're calling setjmp, the stacked - registers are clobbered, so we fall back to the stack. */ - current_frame_info.reg_save_gp - = (current_function_calls_setjmp ? 0 : find_gr_spill (1)); - if (current_frame_info.reg_save_gp == 0) - { - SET_HARD_REG_BIT (mask, GR_REG (1)); - spill_size += 8; - n_spilled += 1; - } - } - else - { - if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)]) - { - SET_HARD_REG_BIT (mask, BR_REG (0)); - extra_spill_size += 8; - n_spilled += 1; - } - - if (regs_ever_live[AR_PFS_REGNUM]) - { - SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); - current_frame_info.reg_save_ar_pfs = find_gr_spill (1); - if (current_frame_info.reg_save_ar_pfs == 0) - { - extra_spill_size += 8; - n_spilled += 1; - } - } - } - - /* Unwind descriptor hackery: things are most efficient if we allocate - consecutive GR save registers for RP, PFS, FP in that order. However, - it is absolutely critical that FP get the only hard register that's - guaranteed to be free, so we allocated it first. If all three did - happen to be allocated hard regs, and are consecutive, rearrange them - into the preferred order now. */ - if (current_frame_info.reg_fp != 0 - && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1 - && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2) - { - current_frame_info.reg_save_b0 = current_frame_info.reg_fp; - current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1; - current_frame_info.reg_fp = current_frame_info.reg_fp + 2; - } - - /* See if we need to store the predicate register block. */ - for (regno = PR_REG (0); regno <= PR_REG (63); regno++) - if (regs_ever_live[regno] && ! call_used_regs[regno]) - break; - if (regno <= PR_REG (63)) - { - SET_HARD_REG_BIT (mask, PR_REG (0)); - current_frame_info.reg_save_pr = find_gr_spill (1); - if (current_frame_info.reg_save_pr == 0) - { - extra_spill_size += 8; - n_spilled += 1; - } - - /* ??? Mark them all as used so that register renaming and such - are free to use them. */ - for (regno = PR_REG (0); regno <= PR_REG (63); regno++) - regs_ever_live[regno] = 1; - } - - /* If we're forced to use st8.spill, we're forced to save and restore - ar.unat as well. The check for existing liveness allows inline asm - to touch ar.unat. */ - if (spilled_gr_p || cfun->machine->n_varargs - || regs_ever_live[AR_UNAT_REGNUM]) - { - regs_ever_live[AR_UNAT_REGNUM] = 1; - SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); - current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0); - if (current_frame_info.reg_save_ar_unat == 0) - { - extra_spill_size += 8; - n_spilled += 1; - } - } - - if (regs_ever_live[AR_LC_REGNUM]) - { - SET_HARD_REG_BIT (mask, AR_LC_REGNUM); - current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0); - if (current_frame_info.reg_save_ar_lc == 0) - { - extra_spill_size += 8; - n_spilled += 1; - } - } - - /* If we have an odd number of words of pretend arguments written to - the stack, then the FR save area will be unaligned. We round the - size of this area up to keep things 16 byte aligned. */ - if (spilled_fr_p) - pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size); - else - pretend_args_size = current_function_pretend_args_size; - - total_size = (spill_size + extra_spill_size + size + pretend_args_size - + current_function_outgoing_args_size); - total_size = IA64_STACK_ALIGN (total_size); - - /* We always use the 16-byte scratch area provided by the caller, but - if we are a leaf function, there's no one to which we need to provide - a scratch area. */ - if (current_function_is_leaf) - total_size = MAX (0, total_size - 16); - - current_frame_info.total_size = total_size; - current_frame_info.spill_cfa_off = pretend_args_size - 16; - current_frame_info.spill_size = spill_size; - current_frame_info.extra_spill_size = extra_spill_size; - COPY_HARD_REG_SET (current_frame_info.mask, mask); - current_frame_info.n_spilled = n_spilled; - current_frame_info.initialized = reload_completed; -} - -/* Compute the initial difference between the specified pair of registers. */ - -HOST_WIDE_INT -ia64_initial_elimination_offset (int from, int to) -{ - HOST_WIDE_INT offset; - - ia64_compute_frame_size (get_frame_size ()); - switch (from) - { - case FRAME_POINTER_REGNUM: - switch (to) - { - case HARD_FRAME_POINTER_REGNUM: - if (current_function_is_leaf) - offset = -current_frame_info.total_size; - else - offset = -(current_frame_info.total_size - - current_function_outgoing_args_size - 16); - break; - - case STACK_POINTER_REGNUM: - if (current_function_is_leaf) - offset = 0; - else - offset = 16 + current_function_outgoing_args_size; - break; - - default: - gcc_unreachable (); - } - break; - - case ARG_POINTER_REGNUM: - /* Arguments start above the 16 byte save area, unless stdarg - in which case we store through the 16 byte save area. */ - switch (to) - { - case HARD_FRAME_POINTER_REGNUM: - offset = 16 - current_function_pretend_args_size; - break; - - case STACK_POINTER_REGNUM: - offset = (current_frame_info.total_size - + 16 - current_function_pretend_args_size); - break; - - default: - gcc_unreachable (); - } - break; - - default: - gcc_unreachable (); - } - - return offset; -} - -/* If there are more than a trivial number of register spills, we use - two interleaved iterators so that we can get two memory references - per insn group. - - In order to simplify things in the prologue and epilogue expanders, - we use helper functions to fix up the memory references after the - fact with the appropriate offsets to a POST_MODIFY memory mode. - The following data structure tracks the state of the two iterators - while insns are being emitted. */ - -struct spill_fill_data -{ - rtx init_after; /* point at which to emit initializations */ - rtx init_reg[2]; /* initial base register */ - rtx iter_reg[2]; /* the iterator registers */ - rtx *prev_addr[2]; /* address of last memory use */ - rtx prev_insn[2]; /* the insn corresponding to prev_addr */ - HOST_WIDE_INT prev_off[2]; /* last offset */ - int n_iter; /* number of iterators in use */ - int next_iter; /* next iterator to use */ - unsigned int save_gr_used_mask; -}; - -static struct spill_fill_data spill_fill_data; - -static void -setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off) -{ - int i; - - spill_fill_data.init_after = get_last_insn (); - spill_fill_data.init_reg[0] = init_reg; - spill_fill_data.init_reg[1] = init_reg; - spill_fill_data.prev_addr[0] = NULL; - spill_fill_data.prev_addr[1] = NULL; - spill_fill_data.prev_insn[0] = NULL; - spill_fill_data.prev_insn[1] = NULL; - spill_fill_data.prev_off[0] = cfa_off; - spill_fill_data.prev_off[1] = cfa_off; - spill_fill_data.next_iter = 0; - spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; - - spill_fill_data.n_iter = 1 + (n_spills > 2); - for (i = 0; i < spill_fill_data.n_iter; ++i) - { - int regno = next_scratch_gr_reg (); - spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); - current_frame_info.gr_used_mask |= 1 << regno; - } -} - -static void -finish_spill_pointers (void) -{ - current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; -} - -static rtx -spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off) -{ - int iter = spill_fill_data.next_iter; - HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; - rtx disp_rtx = GEN_INT (disp); - rtx mem; - - if (spill_fill_data.prev_addr[iter]) - { - if (CONST_OK_FOR_N (disp)) - { - *spill_fill_data.prev_addr[iter] - = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], - gen_rtx_PLUS (DImode, - spill_fill_data.iter_reg[iter], - disp_rtx)); - REG_NOTES (spill_fill_data.prev_insn[iter]) - = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter], - REG_NOTES (spill_fill_data.prev_insn[iter])); - } - else - { - /* ??? Could use register post_modify for loads. */ - if (! CONST_OK_FOR_I (disp)) - { - rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); - emit_move_insn (tmp, disp_rtx); - disp_rtx = tmp; - } - emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], - spill_fill_data.iter_reg[iter], disp_rtx)); - } - } - /* Micro-optimization: if we've created a frame pointer, it's at - CFA 0, which may allow the real iterator to be initialized lower, - slightly increasing parallelism. Also, if there are few saves - it may eliminate the iterator entirely. */ - else if (disp == 0 - && spill_fill_data.init_reg[iter] == stack_pointer_rtx - && frame_pointer_needed) - { - mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); - set_mem_alias_set (mem, get_varargs_alias_set ()); - return mem; - } - else - { - rtx seq, insn; - - if (disp == 0) - seq = gen_movdi (spill_fill_data.iter_reg[iter], - spill_fill_data.init_reg[iter]); - else - { - start_sequence (); - - if (! CONST_OK_FOR_I (disp)) - { - rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); - emit_move_insn (tmp, disp_rtx); - disp_rtx = tmp; - } - - emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], - spill_fill_data.init_reg[iter], - disp_rtx)); - - seq = get_insns (); - end_sequence (); - } - - /* Careful for being the first insn in a sequence. */ - if (spill_fill_data.init_after) - insn = emit_insn_after (seq, spill_fill_data.init_after); - else - { - rtx first = get_insns (); - if (first) - insn = emit_insn_before (seq, first); - else - insn = emit_insn (seq); - } - spill_fill_data.init_after = insn; - - /* If DISP is 0, we may or may not have a further adjustment - afterward. If we do, then the load/store insn may be modified - to be a post-modify. If we don't, then this copy may be - eliminated by copyprop_hardreg_forward, which makes this - insn garbage, which runs afoul of the sanity check in - propagate_one_insn. So mark this insn as legal to delete. */ - if (disp == 0) - REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, - REG_NOTES (insn)); - } - - mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); - - /* ??? Not all of the spills are for varargs, but some of them are. - The rest of the spills belong in an alias set of their own. But - it doesn't actually hurt to include them here. */ - set_mem_alias_set (mem, get_varargs_alias_set ()); - - spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); - spill_fill_data.prev_off[iter] = cfa_off; - - if (++iter >= spill_fill_data.n_iter) - iter = 0; - spill_fill_data.next_iter = iter; - - return mem; -} - -static void -do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off, - rtx frame_reg) -{ - int iter = spill_fill_data.next_iter; - rtx mem, insn; - - mem = spill_restore_mem (reg, cfa_off); - insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); - spill_fill_data.prev_insn[iter] = insn; - - if (frame_reg) - { - rtx base; - HOST_WIDE_INT off; - - RTX_FRAME_RELATED_P (insn) = 1; - - /* Don't even pretend that the unwind code can intuit its way - through a pair of interleaved post_modify iterators. Just - provide the correct answer. */ - - if (frame_pointer_needed) - { - base = hard_frame_pointer_rtx; - off = - cfa_off; - } - else - { - base = stack_pointer_rtx; - off = current_frame_info.total_size - cfa_off; - } - - REG_NOTES (insn) - = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, - gen_rtx_SET (VOIDmode, - gen_rtx_MEM (GET_MODE (reg), - plus_constant (base, off)), - frame_reg), - REG_NOTES (insn)); - } -} - -static void -do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off) -{ - int iter = spill_fill_data.next_iter; - rtx insn; - - insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), - GEN_INT (cfa_off))); - spill_fill_data.prev_insn[iter] = insn; -} - -/* Wrapper functions that discards the CONST_INT spill offset. These - exist so that we can give gr_spill/gr_fill the offset they need and - use a consistent function interface. */ - -static rtx -gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) -{ - return gen_movdi (dest, src); -} - -static rtx -gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) -{ - return gen_fr_spill (dest, src); -} - -static rtx -gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) -{ - return gen_fr_restore (dest, src); -} - -/* Called after register allocation to add any instructions needed for the - prologue. Using a prologue insn is favored compared to putting all of the - instructions in output_function_prologue(), since it allows the scheduler - to intermix instructions with the saves of the caller saved registers. In - some cases, it might be necessary to emit a barrier instruction as the last - insn to prevent such scheduling. - - Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 - so that the debug info generation code can handle them properly. - - The register save area is layed out like so: - cfa+16 - [ varargs spill area ] - [ fr register spill area ] - [ br register spill area ] - [ ar register spill area ] - [ pr register spill area ] - [ gr register spill area ] */ - -/* ??? Get inefficient code when the frame size is larger than can fit in an - adds instruction. */ - -void -ia64_expand_prologue (void) -{ - rtx insn, ar_pfs_save_reg, ar_unat_save_reg; - int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; - rtx reg, alt_reg; - - ia64_compute_frame_size (get_frame_size ()); - last_scratch_gr_reg = 15; - - /* If there is no epilogue, then we don't need some prologue insns. - We need to avoid emitting the dead prologue insns, because flow - will complain about them. */ - if (optimize) - { - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) - if ((e->flags & EDGE_FAKE) == 0 - && (e->flags & EDGE_FALLTHRU) != 0) - break; - epilogue_p = (e != NULL); - } - else - epilogue_p = 1; - - /* Set the local, input, and output register names. We need to do this - for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in - half. If we use in/loc/out register names, then we get assembler errors - in crtn.S because there is no alloc insn or regstk directive in there. */ - if (! TARGET_REG_NAMES) - { - int inputs = current_frame_info.n_input_regs; - int locals = current_frame_info.n_local_regs; - int outputs = current_frame_info.n_output_regs; - - for (i = 0; i < inputs; i++) - reg_names[IN_REG (i)] = ia64_reg_numbers[i]; - for (i = 0; i < locals; i++) - reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; - for (i = 0; i < outputs; i++) - reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; - } - - /* Set the frame pointer register name. The regnum is logically loc79, - but of course we'll not have allocated that many locals. Rather than - worrying about renumbering the existing rtxs, we adjust the name. */ - /* ??? This code means that we can never use one local register when - there is a frame pointer. loc79 gets wasted in this case, as it is - renamed to a register that will never be used. See also the try_locals - code in find_gr_spill. */ - if (current_frame_info.reg_fp) - { - const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; - reg_names[HARD_FRAME_POINTER_REGNUM] - = reg_names[current_frame_info.reg_fp]; - reg_names[current_frame_info.reg_fp] = tmp; - } - - /* We don't need an alloc instruction if we've used no outputs or locals. */ - if (current_frame_info.n_local_regs == 0 - && current_frame_info.n_output_regs == 0 - && current_frame_info.n_input_regs <= current_function_args_info.int_regs - && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) - { - /* If there is no alloc, but there are input registers used, then we - need a .regstk directive. */ - current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); - ar_pfs_save_reg = NULL_RTX; - } - else - { - current_frame_info.need_regstk = 0; - - if (current_frame_info.reg_save_ar_pfs) - regno = current_frame_info.reg_save_ar_pfs; - else - regno = next_scratch_gr_reg (); - ar_pfs_save_reg = gen_rtx_REG (DImode, regno); - - insn = emit_insn (gen_alloc (ar_pfs_save_reg, - GEN_INT (current_frame_info.n_input_regs), - GEN_INT (current_frame_info.n_local_regs), - GEN_INT (current_frame_info.n_output_regs), - GEN_INT (current_frame_info.n_rotate_regs))); - RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0); - } - - /* Set up frame pointer, stack pointer, and spill iterators. */ - - n_varargs = cfun->machine->n_varargs; - setup_spill_pointers (current_frame_info.n_spilled + n_varargs, - stack_pointer_rtx, 0); - - if (frame_pointer_needed) - { - insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); - RTX_FRAME_RELATED_P (insn) = 1; - } - - if (current_frame_info.total_size != 0) - { - rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); - rtx offset; - - if (CONST_OK_FOR_I (- current_frame_info.total_size)) - offset = frame_size_rtx; - else - { - regno = next_scratch_gr_reg (); - offset = gen_rtx_REG (DImode, regno); - emit_move_insn (offset, frame_size_rtx); - } - - insn = emit_insn (gen_adddi3 (stack_pointer_rtx, - stack_pointer_rtx, offset)); - - if (! frame_pointer_needed) - { - RTX_FRAME_RELATED_P (insn) = 1; - if (GET_CODE (offset) != CONST_INT) - { - REG_NOTES (insn) - = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, - gen_rtx_SET (VOIDmode, - stack_pointer_rtx, - gen_rtx_PLUS (DImode, - stack_pointer_rtx, - frame_size_rtx)), - REG_NOTES (insn)); - } - } - - /* ??? At this point we must generate a magic insn that appears to - modify the stack pointer, the frame pointer, and all spill - iterators. This would allow the most scheduling freedom. For - now, just hard stop. */ - emit_insn (gen_blockage ()); - } - - /* Must copy out ar.unat before doing any integer spills. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) - { - if (current_frame_info.reg_save_ar_unat) - ar_unat_save_reg - = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); - else - { - alt_regno = next_scratch_gr_reg (); - ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); - current_frame_info.gr_used_mask |= 1 << alt_regno; - } - - reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); - insn = emit_move_insn (ar_unat_save_reg, reg); - RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0); - - /* Even if we're not going to generate an epilogue, we still - need to save the register so that EH works. */ - if (! epilogue_p && current_frame_info.reg_save_ar_unat) - emit_insn (gen_prologue_use (ar_unat_save_reg)); - } - else - ar_unat_save_reg = NULL_RTX; - - /* Spill all varargs registers. Do this before spilling any GR registers, - since we want the UNAT bits for the GR registers to override the UNAT - bits from varargs, which we don't care about. */ - - cfa_off = -16; - for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) - { - reg = gen_rtx_REG (DImode, regno); - do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); - } - - /* Locate the bottom of the register save area. */ - cfa_off = (current_frame_info.spill_cfa_off - + current_frame_info.spill_size - + current_frame_info.extra_spill_size); - - /* Save the predicate register block either in a register or in memory. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) - { - reg = gen_rtx_REG (DImode, PR_REG (0)); - if (current_frame_info.reg_save_pr != 0) - { - alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); - insn = emit_move_insn (alt_reg, reg); - - /* ??? Denote pr spill/fill by a DImode move that modifies all - 64 hard registers. */ - RTX_FRAME_RELATED_P (insn) = 1; - REG_NOTES (insn) - = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, - gen_rtx_SET (VOIDmode, alt_reg, reg), - REG_NOTES (insn)); - - /* Even if we're not going to generate an epilogue, we still - need to save the register so that EH works. */ - if (! epilogue_p) - emit_insn (gen_prologue_use (alt_reg)); - } - else - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - insn = emit_move_insn (alt_reg, reg); - do_spill (gen_movdi_x, alt_reg, cfa_off, reg); - cfa_off -= 8; - } - } - - /* Handle AR regs in numerical order. All of them get special handling. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) - && current_frame_info.reg_save_ar_unat == 0) - { - reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); - do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); - cfa_off -= 8; - } - - /* The alloc insn already copied ar.pfs into a general register. The - only thing we have to do now is copy that register to a stack slot - if we'd not allocated a local register for the job. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) - && current_frame_info.reg_save_ar_pfs == 0) - { - reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); - do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); - cfa_off -= 8; - } - - if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) - { - reg = gen_rtx_REG (DImode, AR_LC_REGNUM); - if (current_frame_info.reg_save_ar_lc != 0) - { - alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); - insn = emit_move_insn (alt_reg, reg); - RTX_FRAME_RELATED_P (insn) = 1; - - /* Even if we're not going to generate an epilogue, we still - need to save the register so that EH works. */ - if (! epilogue_p) - emit_insn (gen_prologue_use (alt_reg)); - } - else - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - emit_move_insn (alt_reg, reg); - do_spill (gen_movdi_x, alt_reg, cfa_off, reg); - cfa_off -= 8; - } - } - - /* Save the return pointer. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) - { - reg = gen_rtx_REG (DImode, BR_REG (0)); - if (current_frame_info.reg_save_b0 != 0) - { - alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); - insn = emit_move_insn (alt_reg, reg); - RTX_FRAME_RELATED_P (insn) = 1; - - /* Even if we're not going to generate an epilogue, we still - need to save the register so that EH works. */ - if (! epilogue_p) - emit_insn (gen_prologue_use (alt_reg)); - } - else - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - emit_move_insn (alt_reg, reg); - do_spill (gen_movdi_x, alt_reg, cfa_off, reg); - cfa_off -= 8; - } - } - - if (current_frame_info.reg_save_gp) - { - insn = emit_move_insn (gen_rtx_REG (DImode, - current_frame_info.reg_save_gp), - pic_offset_table_rtx); - /* We don't know for sure yet if this is actually needed, since - we've not split the PIC call patterns. If all of the calls - are indirect, and not followed by any uses of the gp, then - this save is dead. Allow it to go away. */ - REG_NOTES (insn) - = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn)); - } - - /* We should now be at the base of the gr/br/fr spill area. */ - gcc_assert (cfa_off == (current_frame_info.spill_cfa_off - + current_frame_info.spill_size)); - - /* Spill all general registers. */ - for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) - if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) - { - reg = gen_rtx_REG (DImode, regno); - do_spill (gen_gr_spill, reg, cfa_off, reg); - cfa_off -= 8; - } - - /* Spill the rest of the BR registers. */ - for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) - if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - reg = gen_rtx_REG (DImode, regno); - emit_move_insn (alt_reg, reg); - do_spill (gen_movdi_x, alt_reg, cfa_off, reg); - cfa_off -= 8; - } - - /* Align the frame and spill all FR registers. */ - for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) - if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) - { - gcc_assert (!(cfa_off & 15)); - reg = gen_rtx_REG (XFmode, regno); - do_spill (gen_fr_spill_x, reg, cfa_off, reg); - cfa_off -= 16; - } - - gcc_assert (cfa_off == current_frame_info.spill_cfa_off); - - finish_spill_pointers (); -} - -/* Called after register allocation to add any instructions needed for the - epilogue. Using an epilogue insn is favored compared to putting all of the - instructions in output_function_prologue(), since it allows the scheduler - to intermix instructions with the saves of the caller saved registers. In - some cases, it might be necessary to emit a barrier instruction as the last - insn to prevent such scheduling. */ - -void -ia64_expand_epilogue (int sibcall_p) -{ - rtx insn, reg, alt_reg, ar_unat_save_reg; - int regno, alt_regno, cfa_off; - - ia64_compute_frame_size (get_frame_size ()); - - /* If there is a frame pointer, then we use it instead of the stack - pointer, so that the stack pointer does not need to be valid when - the epilogue starts. See EXIT_IGNORE_STACK. */ - if (frame_pointer_needed) - setup_spill_pointers (current_frame_info.n_spilled, - hard_frame_pointer_rtx, 0); - else - setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, - current_frame_info.total_size); - - if (current_frame_info.total_size != 0) - { - /* ??? At this point we must generate a magic insn that appears to - modify the spill iterators and the frame pointer. This would - allow the most scheduling freedom. For now, just hard stop. */ - emit_insn (gen_blockage ()); - } - - /* Locate the bottom of the register save area. */ - cfa_off = (current_frame_info.spill_cfa_off - + current_frame_info.spill_size - + current_frame_info.extra_spill_size); - - /* Restore the predicate registers. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) - { - if (current_frame_info.reg_save_pr != 0) - alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); - else - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - do_restore (gen_movdi_x, alt_reg, cfa_off); - cfa_off -= 8; - } - reg = gen_rtx_REG (DImode, PR_REG (0)); - emit_move_insn (reg, alt_reg); - } - - /* Restore the application registers. */ - - /* Load the saved unat from the stack, but do not restore it until - after the GRs have been restored. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) - { - if (current_frame_info.reg_save_ar_unat != 0) - ar_unat_save_reg - = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); - else - { - alt_regno = next_scratch_gr_reg (); - ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); - current_frame_info.gr_used_mask |= 1 << alt_regno; - do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); - cfa_off -= 8; - } - } - else - ar_unat_save_reg = NULL_RTX; - - if (current_frame_info.reg_save_ar_pfs != 0) - { - alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs); - reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); - emit_move_insn (reg, alt_reg); - } - else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - do_restore (gen_movdi_x, alt_reg, cfa_off); - cfa_off -= 8; - reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); - emit_move_insn (reg, alt_reg); - } - - if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) - { - if (current_frame_info.reg_save_ar_lc != 0) - alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); - else - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - do_restore (gen_movdi_x, alt_reg, cfa_off); - cfa_off -= 8; - } - reg = gen_rtx_REG (DImode, AR_LC_REGNUM); - emit_move_insn (reg, alt_reg); - } - - /* Restore the return pointer. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) - { - if (current_frame_info.reg_save_b0 != 0) - alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); - else - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - do_restore (gen_movdi_x, alt_reg, cfa_off); - cfa_off -= 8; - } - reg = gen_rtx_REG (DImode, BR_REG (0)); - emit_move_insn (reg, alt_reg); - } - - /* We should now be at the base of the gr/br/fr spill area. */ - gcc_assert (cfa_off == (current_frame_info.spill_cfa_off - + current_frame_info.spill_size)); - - /* The GP may be stored on the stack in the prologue, but it's - never restored in the epilogue. Skip the stack slot. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) - cfa_off -= 8; - - /* Restore all general registers. */ - for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) - if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) - { - reg = gen_rtx_REG (DImode, regno); - do_restore (gen_gr_restore, reg, cfa_off); - cfa_off -= 8; - } - - /* Restore the branch registers. */ - for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) - if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) - { - alt_regno = next_scratch_gr_reg (); - alt_reg = gen_rtx_REG (DImode, alt_regno); - do_restore (gen_movdi_x, alt_reg, cfa_off); - cfa_off -= 8; - reg = gen_rtx_REG (DImode, regno); - emit_move_insn (reg, alt_reg); - } - - /* Restore floating point registers. */ - for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) - if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) - { - gcc_assert (!(cfa_off & 15)); - reg = gen_rtx_REG (XFmode, regno); - do_restore (gen_fr_restore_x, reg, cfa_off); - cfa_off -= 16; - } - - /* Restore ar.unat for real. */ - if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) - { - reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); - emit_move_insn (reg, ar_unat_save_reg); - } - - gcc_assert (cfa_off == current_frame_info.spill_cfa_off); - - finish_spill_pointers (); - - if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp) - { - /* ??? At this point we must generate a magic insn that appears to - modify the spill iterators, the stack pointer, and the frame - pointer. This would allow the most scheduling freedom. For now, - just hard stop. */ - emit_insn (gen_blockage ()); - } - - if (cfun->machine->ia64_eh_epilogue_sp) - emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); - else if (frame_pointer_needed) - { - insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); - RTX_FRAME_RELATED_P (insn) = 1; - } - else if (current_frame_info.total_size) - { - rtx offset, frame_size_rtx; - - frame_size_rtx = GEN_INT (current_frame_info.total_size); - if (CONST_OK_FOR_I (current_frame_info.total_size)) - offset = frame_size_rtx; - else - { - regno = next_scratch_gr_reg (); - offset = gen_rtx_REG (DImode, regno); - emit_move_insn (offset, frame_size_rtx); - } - - insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, - offset)); - - RTX_FRAME_RELATED_P (insn) = 1; - if (GET_CODE (offset) != CONST_INT) - { - REG_NOTES (insn) - = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, - gen_rtx_SET (VOIDmode, - stack_pointer_rtx, - gen_rtx_PLUS (DImode, - stack_pointer_rtx, - frame_size_rtx)), - REG_NOTES (insn)); - } - } - - if (cfun->machine->ia64_eh_epilogue_bsp) - emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); - - if (! sibcall_p) - emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); - else - { - int fp = GR_REG (2); - /* We need a throw away register here, r0 and r1 are reserved, so r2 is the - first available call clobbered register. If there was a frame_pointer - register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM, - so we have to make sure we're using the string "r2" when emitting - the register name for the assembler. */ - if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2)) - fp = HARD_FRAME_POINTER_REGNUM; - - /* We must emit an alloc to force the input registers to become output - registers. Otherwise, if the callee tries to pass its parameters - through to another call without an intervening alloc, then these - values get lost. */ - /* ??? We don't need to preserve all input registers. We only need to - preserve those input registers used as arguments to the sibling call. - It is unclear how to compute that number here. */ - if (current_frame_info.n_input_regs != 0) - { - rtx n_inputs = GEN_INT (current_frame_info.n_input_regs); - insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), - const0_rtx, const0_rtx, - n_inputs, const0_rtx)); - RTX_FRAME_RELATED_P (insn) = 1; - } - } -} - -/* Return 1 if br.ret can do all the work required to return from a - function. */ - -int -ia64_direct_return (void) -{ - if (reload_completed && ! frame_pointer_needed) - { - ia64_compute_frame_size (get_frame_size ()); - - return (current_frame_info.total_size == 0 - && current_frame_info.n_spilled == 0 - && current_frame_info.reg_save_b0 == 0 - && current_frame_info.reg_save_pr == 0 - && current_frame_info.reg_save_ar_pfs == 0 - && current_frame_info.reg_save_ar_unat == 0 - && current_frame_info.reg_save_ar_lc == 0); - } - return 0; -} - -/* Return the magic cookie that we use to hold the return address - during early compilation. */ - -rtx -ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED) -{ - if (count != 0) - return NULL; - return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); -} - -/* Split this value after reload, now that we know where the return - address is saved. */ - -void -ia64_split_return_addr_rtx (rtx dest) -{ - rtx src; - - if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) - { - if (current_frame_info.reg_save_b0 != 0) - src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); - else - { - HOST_WIDE_INT off; - unsigned int regno; - - /* Compute offset from CFA for BR0. */ - /* ??? Must be kept in sync with ia64_expand_prologue. */ - off = (current_frame_info.spill_cfa_off - + current_frame_info.spill_size); - for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) - if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) - off -= 8; - - /* Convert CFA offset to a register based offset. */ - if (frame_pointer_needed) - src = hard_frame_pointer_rtx; - else - { - src = stack_pointer_rtx; - off += current_frame_info.total_size; - } - - /* Load address into scratch register. */ - if (CONST_OK_FOR_I (off)) - emit_insn (gen_adddi3 (dest, src, GEN_INT (off))); - else - { - emit_move_insn (dest, GEN_INT (off)); - emit_insn (gen_adddi3 (dest, src, dest)); - } - - src = gen_rtx_MEM (Pmode, dest); - } - } - else - src = gen_rtx_REG (DImode, BR_REG (0)); - - emit_move_insn (dest, src); -} - -int -ia64_hard_regno_rename_ok (int from, int to) -{ - /* Don't clobber any of the registers we reserved for the prologue. */ - if (to == current_frame_info.reg_fp - || to == current_frame_info.reg_save_b0 - || to == current_frame_info.reg_save_pr - || to == current_frame_info.reg_save_ar_pfs - || to == current_frame_info.reg_save_ar_unat - || to == current_frame_info.reg_save_ar_lc) - return 0; - - if (from == current_frame_info.reg_fp - || from == current_frame_info.reg_save_b0 - || from == current_frame_info.reg_save_pr - || from == current_frame_info.reg_save_ar_pfs - || from == current_frame_info.reg_save_ar_unat - || from == current_frame_info.reg_save_ar_lc) - return 0; - - /* Don't use output registers outside the register frame. */ - if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) - return 0; - - /* Retain even/oddness on predicate register pairs. */ - if (PR_REGNO_P (from) && PR_REGNO_P (to)) - return (from & 1) == (to & 1); - - return 1; -} - -/* Target hook for assembling integer objects. Handle word-sized - aligned objects and detect the cases when @fptr is needed. */ - -static bool -ia64_assemble_integer (rtx x, unsigned int size, int aligned_p) -{ - if (size == POINTER_SIZE / BITS_PER_UNIT - && !(TARGET_NO_PIC || TARGET_AUTO_PIC) - && GET_CODE (x) == SYMBOL_REF - && SYMBOL_REF_FUNCTION_P (x)) - { - static const char * const directive[2][2] = { - /* 64-bit pointer */ /* 32-bit pointer */ - { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */ - { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */ - }; - fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file); - output_addr_const (asm_out_file, x); - fputs (")\n", asm_out_file); - return true; - } - return default_assemble_integer (x, size, aligned_p); -} - -/* Emit the function prologue. */ - -static void -ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) -{ - int mask, grsave, grsave_prev; - - if (current_frame_info.need_regstk) - fprintf (file, "\t.regstk %d, %d, %d, %d\n", - current_frame_info.n_input_regs, - current_frame_info.n_local_regs, - current_frame_info.n_output_regs, - current_frame_info.n_rotate_regs); - - if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) - return; - - /* Emit the .prologue directive. */ - - mask = 0; - grsave = grsave_prev = 0; - if (current_frame_info.reg_save_b0 != 0) - { - mask |= 8; - grsave = grsave_prev = current_frame_info.reg_save_b0; - } - if (current_frame_info.reg_save_ar_pfs != 0 - && (grsave_prev == 0 - || current_frame_info.reg_save_ar_pfs == grsave_prev + 1)) - { - mask |= 4; - if (grsave_prev == 0) - grsave = current_frame_info.reg_save_ar_pfs; - grsave_prev = current_frame_info.reg_save_ar_pfs; - } - if (current_frame_info.reg_fp != 0 - && (grsave_prev == 0 - || current_frame_info.reg_fp == grsave_prev + 1)) - { - mask |= 2; - if (grsave_prev == 0) - grsave = HARD_FRAME_POINTER_REGNUM; - grsave_prev = current_frame_info.reg_fp; - } - if (current_frame_info.reg_save_pr != 0 - && (grsave_prev == 0 - || current_frame_info.reg_save_pr == grsave_prev + 1)) - { - mask |= 1; - if (grsave_prev == 0) - grsave = current_frame_info.reg_save_pr; - } - - if (mask && TARGET_GNU_AS) - fprintf (file, "\t.prologue %d, %d\n", mask, - ia64_dbx_register_number (grsave)); - else - fputs ("\t.prologue\n", file); - - /* Emit a .spill directive, if necessary, to relocate the base of - the register spill area. */ - if (current_frame_info.spill_cfa_off != -16) - fprintf (file, "\t.spill %ld\n", - (long) (current_frame_info.spill_cfa_off - + current_frame_info.spill_size)); -} - -/* Emit the .body directive at the scheduled end of the prologue. */ - -static void -ia64_output_function_end_prologue (FILE *file) -{ - if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) - return; - - fputs ("\t.body\n", file); -} - -/* Emit the function epilogue. */ - -static void -ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, - HOST_WIDE_INT size ATTRIBUTE_UNUSED) -{ - int i; - - if (current_frame_info.reg_fp) - { - const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; - reg_names[HARD_FRAME_POINTER_REGNUM] - = reg_names[current_frame_info.reg_fp]; - reg_names[current_frame_info.reg_fp] = tmp; - } - if (! TARGET_REG_NAMES) - { - for (i = 0; i < current_frame_info.n_input_regs; i++) - reg_names[IN_REG (i)] = ia64_input_reg_names[i]; - for (i = 0; i < current_frame_info.n_local_regs; i++) - reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; - for (i = 0; i < current_frame_info.n_output_regs; i++) - reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; - } - - current_frame_info.initialized = 0; -} - -int -ia64_dbx_register_number (int regno) -{ - /* In ia64_expand_prologue we quite literally renamed the frame pointer - from its home at loc79 to something inside the register frame. We - must perform the same renumbering here for the debug info. */ - if (current_frame_info.reg_fp) - { - if (regno == HARD_FRAME_POINTER_REGNUM) - regno = current_frame_info.reg_fp; - else if (regno == current_frame_info.reg_fp) - regno = HARD_FRAME_POINTER_REGNUM; - } - - if (IN_REGNO_P (regno)) - return 32 + regno - IN_REG (0); - else if (LOC_REGNO_P (regno)) - return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); - else if (OUT_REGNO_P (regno)) - return (32 + current_frame_info.n_input_regs - + current_frame_info.n_local_regs + regno - OUT_REG (0)); - else - return regno; -} - -void -ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain) -{ - rtx addr_reg, eight = GEN_INT (8); - - /* The Intel assembler requires that the global __ia64_trampoline symbol - be declared explicitly */ - if (!TARGET_GNU_AS) - { - static bool declared_ia64_trampoline = false; - - if (!declared_ia64_trampoline) - { - declared_ia64_trampoline = true; - (*targetm.asm_out.globalize_label) (asm_out_file, - "__ia64_trampoline"); - } - } - - /* Make sure addresses are Pmode even if we are in ILP32 mode. */ - addr = convert_memory_address (Pmode, addr); - fnaddr = convert_memory_address (Pmode, fnaddr); - static_chain = convert_memory_address (Pmode, static_chain); - - /* Load up our iterator. */ - addr_reg = gen_reg_rtx (Pmode); - emit_move_insn (addr_reg, addr); - - /* The first two words are the fake descriptor: - __ia64_trampoline, ADDR+16. */ - emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), - gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline")); - emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); - - emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), - copy_to_reg (plus_constant (addr, 16))); - emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); - - /* The third word is the target descriptor. */ - emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr); - emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); - - /* The fourth word is the static chain. */ - emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain); -} - -/* Do any needed setup for a variadic function. CUM has not been updated - for the last named argument which has type TYPE and mode MODE. - - We generate the actual spill instructions during prologue generation. */ - -static void -ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, int * pretend_size, - int second_time ATTRIBUTE_UNUSED) -{ - CUMULATIVE_ARGS next_cum = *cum; - - /* Skip the current argument. */ - ia64_function_arg_advance (&next_cum, mode, type, 1); - - if (next_cum.words < MAX_ARGUMENT_SLOTS) - { - int n = MAX_ARGUMENT_SLOTS - next_cum.words; - *pretend_size = n * UNITS_PER_WORD; - cfun->machine->n_varargs = n; - } -} - -/* Check whether TYPE is a homogeneous floating point aggregate. If - it is, return the mode of the floating point type that appears - in all leafs. If it is not, return VOIDmode. - - An aggregate is a homogeneous floating point aggregate is if all - fields/elements in it have the same floating point type (e.g, - SFmode). 128-bit quad-precision floats are excluded. - - Variable sized aggregates should never arrive here, since we should - have already decided to pass them by reference. Top-level zero-sized - aggregates are excluded because our parallels crash the middle-end. */ - -static enum machine_mode -hfa_element_mode (tree type, bool nested) -{ - enum machine_mode element_mode = VOIDmode; - enum machine_mode mode; - enum tree_code code = TREE_CODE (type); - int know_element_mode = 0; - tree t; - - if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type)))) - return VOIDmode; - - switch (code) - { - case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: - case BOOLEAN_TYPE: case POINTER_TYPE: - case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: - case LANG_TYPE: case FUNCTION_TYPE: - return VOIDmode; - - /* Fortran complex types are supposed to be HFAs, so we need to handle - gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex - types though. */ - case COMPLEX_TYPE: - if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT - && TYPE_MODE (type) != TCmode) - return GET_MODE_INNER (TYPE_MODE (type)); - else - return VOIDmode; - - case REAL_TYPE: - /* We want to return VOIDmode for raw REAL_TYPEs, but the actual - mode if this is contained within an aggregate. */ - if (nested && TYPE_MODE (type) != TFmode) - return TYPE_MODE (type); - else - return VOIDmode; - - case ARRAY_TYPE: - return hfa_element_mode (TREE_TYPE (type), 1); - - case RECORD_TYPE: - case UNION_TYPE: - case QUAL_UNION_TYPE: - for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) - { - if (TREE_CODE (t) != FIELD_DECL) - continue; - - mode = hfa_element_mode (TREE_TYPE (t), 1); - if (know_element_mode) - { - if (mode != element_mode) - return VOIDmode; - } - else if (GET_MODE_CLASS (mode) != MODE_FLOAT) - return VOIDmode; - else - { - know_element_mode = 1; - element_mode = mode; - } - } - return element_mode; - - default: - /* If we reach here, we probably have some front-end specific type - that the backend doesn't know about. This can happen via the - aggregate_value_p call in init_function_start. All we can do is - ignore unknown tree types. */ - return VOIDmode; - } - - return VOIDmode; -} - -/* Return the number of words required to hold a quantity of TYPE and MODE - when passed as an argument. */ -static int -ia64_function_arg_words (tree type, enum machine_mode mode) -{ - int words; - - if (mode == BLKmode) - words = int_size_in_bytes (type); - else - words = GET_MODE_SIZE (mode); - - return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */ -} - -/* Return the number of registers that should be skipped so the current - argument (described by TYPE and WORDS) will be properly aligned. - - Integer and float arguments larger than 8 bytes start at the next - even boundary. Aggregates larger than 8 bytes start at the next - even boundary if the aggregate has 16 byte alignment. Note that - in the 32-bit ABI, TImode and TFmode have only 8-byte alignment - but are still to be aligned in registers. - - ??? The ABI does not specify how to handle aggregates with - alignment from 9 to 15 bytes, or greater than 16. We handle them - all as if they had 16 byte alignment. Such aggregates can occur - only if gcc extensions are used. */ -static int -ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words) -{ - if ((cum->words & 1) == 0) - return 0; - - if (type - && TREE_CODE (type) != INTEGER_TYPE - && TREE_CODE (type) != REAL_TYPE) - return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT; - else - return words > 1; -} - -/* Return rtx for register where argument is passed, or zero if it is passed - on the stack. */ -/* ??? 128-bit quad-precision floats are always passed in general - registers. */ - -rtx -ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, - int named, int incoming) -{ - int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); - int words = ia64_function_arg_words (type, mode); - int offset = ia64_function_arg_offset (cum, type, words); - enum machine_mode hfa_mode = VOIDmode; - - /* If all argument slots are used, then it must go on the stack. */ - if (cum->words + offset >= MAX_ARGUMENT_SLOTS) - return 0; - - /* Check for and handle homogeneous FP aggregates. */ - if (type) - hfa_mode = hfa_element_mode (type, 0); - - /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas - and unprototyped hfas are passed specially. */ - if (hfa_mode != VOIDmode && (! cum->prototype || named)) - { - rtx loc[16]; - int i = 0; - int fp_regs = cum->fp_regs; - int int_regs = cum->words + offset; - int hfa_size = GET_MODE_SIZE (hfa_mode); - int byte_size; - int args_byte_size; - - /* If prototyped, pass it in FR regs then GR regs. - If not prototyped, pass it in both FR and GR regs. - - If this is an SFmode aggregate, then it is possible to run out of - FR regs while GR regs are still left. In that case, we pass the - remaining part in the GR regs. */ - - /* Fill the FP regs. We do this always. We stop if we reach the end - of the argument, the last FP register, or the last argument slot. */ - - byte_size = ((mode == BLKmode) - ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); - args_byte_size = int_regs * UNITS_PER_WORD; - offset = 0; - for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS - && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) - { - loc[i] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (hfa_mode, (FR_ARG_FIRST - + fp_regs)), - GEN_INT (offset)); - offset += hfa_size; - args_byte_size += hfa_size; - fp_regs++; - } - - /* If no prototype, then the whole thing must go in GR regs. */ - if (! cum->prototype) - offset = 0; - /* If this is an SFmode aggregate, then we might have some left over - that needs to go in GR regs. */ - else if (byte_size != offset) - int_regs += offset / UNITS_PER_WORD; - - /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ - - for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) - { - enum machine_mode gr_mode = DImode; - unsigned int gr_size; - - /* If we have an odd 4 byte hunk because we ran out of FR regs, - then this goes in a GR reg left adjusted/little endian, right - adjusted/big endian. */ - /* ??? Currently this is handled wrong, because 4-byte hunks are - always right adjusted/little endian. */ - if (offset & 0x4) - gr_mode = SImode; - /* If we have an even 4 byte hunk because the aggregate is a - multiple of 4 bytes in size, then this goes in a GR reg right - adjusted/little endian. */ - else if (byte_size - offset == 4) - gr_mode = SImode; - - loc[i] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (gr_mode, (basereg - + int_regs)), - GEN_INT (offset)); - - gr_size = GET_MODE_SIZE (gr_mode); - offset += gr_size; - if (gr_size == UNITS_PER_WORD - || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0)) - int_regs++; - else if (gr_size > UNITS_PER_WORD) - int_regs += gr_size / UNITS_PER_WORD; - } - return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); - } - - /* Integral and aggregates go in general registers. If we have run out of - FR registers, then FP values must also go in general registers. This can - happen when we have a SFmode HFA. */ - else if (mode == TFmode || mode == TCmode - || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) - { - int byte_size = ((mode == BLKmode) - ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); - if (BYTES_BIG_ENDIAN - && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) - && byte_size < UNITS_PER_WORD - && byte_size > 0) - { - rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (DImode, - (basereg + cum->words - + offset)), - const0_rtx); - return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); - } - else - return gen_rtx_REG (mode, basereg + cum->words + offset); - - } - - /* If there is a prototype, then FP values go in a FR register when - named, and in a GR register when unnamed. */ - else if (cum->prototype) - { - if (named) - return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); - /* In big-endian mode, an anonymous SFmode value must be represented - as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force - the value into the high half of the general register. */ - else if (BYTES_BIG_ENDIAN && mode == SFmode) - return gen_rtx_PARALLEL (mode, - gen_rtvec (1, - gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (DImode, basereg + cum->words + offset), - const0_rtx))); - else - return gen_rtx_REG (mode, basereg + cum->words + offset); - } - /* If there is no prototype, then FP values go in both FR and GR - registers. */ - else - { - /* See comment above. */ - enum machine_mode inner_mode = - (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode; - - rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (mode, (FR_ARG_FIRST - + cum->fp_regs)), - const0_rtx); - rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (inner_mode, - (basereg + cum->words - + offset)), - const0_rtx); - - return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); - } -} - -/* Return number of bytes, at the beginning of the argument, that must be - put in registers. 0 is the argument is entirely in registers or entirely - in memory. */ - -static int -ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, bool named ATTRIBUTE_UNUSED) -{ - int words = ia64_function_arg_words (type, mode); - int offset = ia64_function_arg_offset (cum, type, words); - - /* If all argument slots are used, then it must go on the stack. */ - if (cum->words + offset >= MAX_ARGUMENT_SLOTS) - return 0; - - /* It doesn't matter whether the argument goes in FR or GR regs. If - it fits within the 8 argument slots, then it goes entirely in - registers. If it extends past the last argument slot, then the rest - goes on the stack. */ - - if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) - return 0; - - return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD; -} - -/* Update CUM to point after this argument. This is patterned after - ia64_function_arg. */ - -void -ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, int named) -{ - int words = ia64_function_arg_words (type, mode); - int offset = ia64_function_arg_offset (cum, type, words); - enum machine_mode hfa_mode = VOIDmode; - - /* If all arg slots are already full, then there is nothing to do. */ - if (cum->words >= MAX_ARGUMENT_SLOTS) - return; - - cum->words += words + offset; - - /* Check for and handle homogeneous FP aggregates. */ - if (type) - hfa_mode = hfa_element_mode (type, 0); - - /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas - and unprototyped hfas are passed specially. */ - if (hfa_mode != VOIDmode && (! cum->prototype || named)) - { - int fp_regs = cum->fp_regs; - /* This is the original value of cum->words + offset. */ - int int_regs = cum->words - words; - int hfa_size = GET_MODE_SIZE (hfa_mode); - int byte_size; - int args_byte_size; - - /* If prototyped, pass it in FR regs then GR regs. - If not prototyped, pass it in both FR and GR regs. - - If this is an SFmode aggregate, then it is possible to run out of - FR regs while GR regs are still left. In that case, we pass the - remaining part in the GR regs. */ - - /* Fill the FP regs. We do this always. We stop if we reach the end - of the argument, the last FP register, or the last argument slot. */ - - byte_size = ((mode == BLKmode) - ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); - args_byte_size = int_regs * UNITS_PER_WORD; - offset = 0; - for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS - && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) - { - offset += hfa_size; - args_byte_size += hfa_size; - fp_regs++; - } - - cum->fp_regs = fp_regs; - } - - /* Integral and aggregates go in general registers. So do TFmode FP values. - If we have run out of FR registers, then other FP values must also go in - general registers. This can happen when we have a SFmode HFA. */ - else if (mode == TFmode || mode == TCmode - || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) - cum->int_regs = cum->words; - - /* If there is a prototype, then FP values go in a FR register when - named, and in a GR register when unnamed. */ - else if (cum->prototype) - { - if (! named) - cum->int_regs = cum->words; - else - /* ??? Complex types should not reach here. */ - cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); - } - /* If there is no prototype, then FP values go in both FR and GR - registers. */ - else - { - /* ??? Complex types should not reach here. */ - cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); - cum->int_regs = cum->words; - } -} - -/* Arguments with alignment larger than 8 bytes start at the next even - boundary. On ILP32 HPUX, TFmode arguments start on next even boundary - even though their normal alignment is 8 bytes. See ia64_function_arg. */ - -int -ia64_function_arg_boundary (enum machine_mode mode, tree type) -{ - - if (mode == TFmode && TARGET_HPUX && TARGET_ILP32) - return PARM_BOUNDARY * 2; - - if (type) - { - if (TYPE_ALIGN (type) > PARM_BOUNDARY) - return PARM_BOUNDARY * 2; - else - return PARM_BOUNDARY; - } - - if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY) - return PARM_BOUNDARY * 2; - else - return PARM_BOUNDARY; -} - -/* True if it is OK to do sibling call optimization for the specified - call expression EXP. DECL will be the called function, or NULL if - this is an indirect call. */ -static bool -ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) -{ - /* We can't perform a sibcall if the current function has the syscall_linkage - attribute. */ - if (lookup_attribute ("syscall_linkage", - TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) - return false; - - /* We must always return with our current GP. This means we can - only sibcall to functions defined in the current module. */ - return decl && (*targetm.binds_local_p) (decl); -} - - -/* Implement va_arg. */ - -static tree -ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) -{ - /* Variable sized types are passed by reference. */ - if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) - { - tree ptrtype = build_pointer_type (type); - tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p); - return build_va_arg_indirect_ref (addr); - } - - /* Aggregate arguments with alignment larger than 8 bytes start at - the next even boundary. Integer and floating point arguments - do so if they are larger than 8 bytes, whether or not they are - also aligned larger than 8 bytes. */ - if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE) - ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) - { - tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist, - build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1)); - t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, - build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD)); - t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); - gimplify_and_add (t, pre_p); - } - - return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); -} - -/* Return 1 if function return value returned in memory. Return 0 if it is - in a register. */ - -static bool -ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED) -{ - enum machine_mode mode; - enum machine_mode hfa_mode; - HOST_WIDE_INT byte_size; - - mode = TYPE_MODE (valtype); - byte_size = GET_MODE_SIZE (mode); - if (mode == BLKmode) - { - byte_size = int_size_in_bytes (valtype); - if (byte_size < 0) - return true; - } - - /* Hfa's with up to 8 elements are returned in the FP argument registers. */ - - hfa_mode = hfa_element_mode (valtype, 0); - if (hfa_mode != VOIDmode) - { - int hfa_size = GET_MODE_SIZE (hfa_mode); - - if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) - return true; - else - return false; - } - else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) - return true; - else - return false; -} - -/* Return rtx for register that holds the function return value. */ - -rtx -ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED) -{ - enum machine_mode mode; - enum machine_mode hfa_mode; - - mode = TYPE_MODE (valtype); - hfa_mode = hfa_element_mode (valtype, 0); - - if (hfa_mode != VOIDmode) - { - rtx loc[8]; - int i; - int hfa_size; - int byte_size; - int offset; - - hfa_size = GET_MODE_SIZE (hfa_mode); - byte_size = ((mode == BLKmode) - ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); - offset = 0; - for (i = 0; offset < byte_size; i++) - { - loc[i] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), - GEN_INT (offset)); - offset += hfa_size; - } - return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); - } - else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode) - return gen_rtx_REG (mode, FR_ARG_FIRST); - else - { - bool need_parallel = false; - - /* In big-endian mode, we need to manage the layout of aggregates - in the registers so that we get the bits properly aligned in - the highpart of the registers. */ - if (BYTES_BIG_ENDIAN - && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) - need_parallel = true; - - /* Something like struct S { long double x; char a[0] } is not an - HFA structure, and therefore doesn't go in fp registers. But - the middle-end will give it XFmode anyway, and XFmode values - don't normally fit in integer registers. So we need to smuggle - the value inside a parallel. */ - else if (mode == XFmode || mode == XCmode || mode == RFmode) - need_parallel = true; - - if (need_parallel) - { - rtx loc[8]; - int offset; - int bytesize; - int i; - - offset = 0; - bytesize = int_size_in_bytes (valtype); - /* An empty PARALLEL is invalid here, but the return value - doesn't matter for empty structs. */ - if (bytesize == 0) - return gen_rtx_REG (mode, GR_RET_FIRST); - for (i = 0; offset < bytesize; i++) - { - loc[i] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (DImode, - GR_RET_FIRST + i), - GEN_INT (offset)); - offset += UNITS_PER_WORD; - } - return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); - } - - return gen_rtx_REG (mode, GR_RET_FIRST); - } -} - -/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. - We need to emit DTP-relative relocations. */ - -static void -ia64_output_dwarf_dtprel (FILE *file, int size, rtx x) -{ - gcc_assert (size == 4 || size == 8); - if (size == 4) - fputs ("\tdata4.ua\t@dtprel(", file); - else - fputs ("\tdata8.ua\t@dtprel(", file); - output_addr_const (file, x); - fputs (")", file); -} - -/* Print a memory address as an operand to reference that memory location. */ - -/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps - also call this from ia64_print_operand for memory addresses. */ - -void -ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED, - rtx address ATTRIBUTE_UNUSED) -{ -} - -/* Print an operand to an assembler instruction. - C Swap and print a comparison operator. - D Print an FP comparison operator. - E Print 32 - constant, for SImode shifts as extract. - e Print 64 - constant, for DImode rotates. - F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or - a floating point register emitted normally. - I Invert a predicate register by adding 1. - J Select the proper predicate register for a condition. - j Select the inverse predicate register for a condition. - O Append .acq for volatile load. - P Postincrement of a MEM. - Q Append .rel for volatile store. - S Shift amount for shladd instruction. - T Print an 8-bit sign extended number (K) as a 32-bit unsigned number - for Intel assembler. - U Print an 8-bit sign extended number (K) as a 64-bit unsigned number - for Intel assembler. - X A pair of floating point registers. - r Print register name, or constant 0 as r0. HP compatibility for - Linux kernel. - v Print vector constant value as an 8-byte integer value. */ - -void -ia64_print_operand (FILE * file, rtx x, int code) -{ - const char *str; - - switch (code) - { - case 0: - /* Handled below. */ - break; - - case 'C': - { - enum rtx_code c = swap_condition (GET_CODE (x)); - fputs (GET_RTX_NAME (c), file); - return; - } - - case 'D': - switch (GET_CODE (x)) - { - case NE: - str = "neq"; - break; - case UNORDERED: - str = "unord"; - break; - case ORDERED: - str = "ord"; - break; - default: - str = GET_RTX_NAME (GET_CODE (x)); - break; - } - fputs (str, file); - return; - - case 'E': - fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); - return; - - case 'e': - fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); - return; - - case 'F': - if (x == CONST0_RTX (GET_MODE (x))) - str = reg_names [FR_REG (0)]; - else if (x == CONST1_RTX (GET_MODE (x))) - str = reg_names [FR_REG (1)]; - else - { - gcc_assert (GET_CODE (x) == REG); - str = reg_names [REGNO (x)]; - } - fputs (str, file); - return; - - case 'I': - fputs (reg_names [REGNO (x) + 1], file); - return; - - case 'J': - case 'j': - { - unsigned int regno = REGNO (XEXP (x, 0)); - if (GET_CODE (x) == EQ) - regno += 1; - if (code == 'j') - regno ^= 1; - fputs (reg_names [regno], file); - } - return; - - case 'O': - if (MEM_VOLATILE_P (x)) - fputs(".acq", file); - return; - - case 'P': - { - HOST_WIDE_INT value; - - switch (GET_CODE (XEXP (x, 0))) - { - default: - return; - - case POST_MODIFY: - x = XEXP (XEXP (XEXP (x, 0), 1), 1); - if (GET_CODE (x) == CONST_INT) - value = INTVAL (x); - else - { - gcc_assert (GET_CODE (x) == REG); - fprintf (file, ", %s", reg_names[REGNO (x)]); - return; - } - break; - - case POST_INC: - value = GET_MODE_SIZE (GET_MODE (x)); - break; - - case POST_DEC: - value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); - break; - } - - fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value); - return; - } - - case 'Q': - if (MEM_VOLATILE_P (x)) - fputs(".rel", file); - return; - - case 'S': - fprintf (file, "%d", exact_log2 (INTVAL (x))); - return; - - case 'T': - if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) - { - fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); - return; - } - break; - - case 'U': - if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) - { - const char *prefix = "0x"; - if (INTVAL (x) & 0x80000000) - { - fprintf (file, "0xffffffff"); - prefix = ""; - } - fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); - return; - } - break; - - case 'X': - { - unsigned int regno = REGNO (x); - fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]); - } - return; - - case 'r': - /* If this operand is the constant zero, write it as register zero. - Any register, zero, or CONST_INT value is OK here. */ - if (GET_CODE (x) == REG) - fputs (reg_names[REGNO (x)], file); - else if (x == CONST0_RTX (GET_MODE (x))) - fputs ("r0", file); - else if (GET_CODE (x) == CONST_INT) - output_addr_const (file, x); - else - output_operand_lossage ("invalid %%r value"); - return; - - case 'v': - gcc_assert (GET_CODE (x) == CONST_VECTOR); - x = simplify_subreg (DImode, x, GET_MODE (x), 0); - break; - - case '+': - { - const char *which; - - /* For conditional branches, returns or calls, substitute - sptk, dptk, dpnt, or spnt for %s. */ - x = find_reg_note (current_output_insn, REG_BR_PROB, 0); - if (x) - { - int pred_val = INTVAL (XEXP (x, 0)); - - /* Guess top and bottom 10% statically predicted. */ - if (pred_val < REG_BR_PROB_BASE / 50 - && br_prob_note_reliable_p (x)) - which = ".spnt"; - else if (pred_val < REG_BR_PROB_BASE / 2) - which = ".dpnt"; - else if (pred_val < REG_BR_PROB_BASE / 100 * 98 - || !br_prob_note_reliable_p (x)) - which = ".dptk"; - else - which = ".sptk"; - } - else if (GET_CODE (current_output_insn) == CALL_INSN) - which = ".sptk"; - else - which = ".dptk"; - - fputs (which, file); - return; - } - - case ',': - x = current_insn_predicate; - if (x) - { - unsigned int regno = REGNO (XEXP (x, 0)); - if (GET_CODE (x) == EQ) - regno += 1; - fprintf (file, "(%s) ", reg_names [regno]); - } - return; - - default: - output_operand_lossage ("ia64_print_operand: unknown code"); - return; - } - - switch (GET_CODE (x)) - { - /* This happens for the spill/restore instructions. */ - case POST_INC: - case POST_DEC: - case POST_MODIFY: - x = XEXP (x, 0); - /* ... fall through ... */ - - case REG: - fputs (reg_names [REGNO (x)], file); - break; - - case MEM: - { - rtx addr = XEXP (x, 0); - if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) - addr = XEXP (addr, 0); - fprintf (file, "[%s]", reg_names [REGNO (addr)]); - break; - } - - default: - output_addr_const (file, x); - break; - } - - return; -} - -/* Compute a (partial) cost for rtx X. Return true if the complete - cost has been computed, and false if subexpressions should be - scanned. In either case, *TOTAL contains the cost result. */ -/* ??? This is incomplete. */ - -static bool -ia64_rtx_costs (rtx x, int code, int outer_code, int *total) -{ - switch (code) - { - case CONST_INT: - switch (outer_code) - { - case SET: - *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1); - return true; - case PLUS: - if (CONST_OK_FOR_I (INTVAL (x))) - *total = 0; - else if (CONST_OK_FOR_J (INTVAL (x))) - *total = 1; - else - *total = COSTS_N_INSNS (1); - return true; - default: - if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x))) - *total = 0; - else - *total = COSTS_N_INSNS (1); - return true; - } - - case CONST_DOUBLE: - *total = COSTS_N_INSNS (1); - return true; - - case CONST: - case SYMBOL_REF: - case LABEL_REF: - *total = COSTS_N_INSNS (3); - return true; - - case MULT: - /* For multiplies wider than HImode, we have to go to the FPU, - which normally involves copies. Plus there's the latency - of the multiply itself, and the latency of the instructions to - transfer integer regs to FP regs. */ - /* ??? Check for FP mode. */ - if (GET_MODE_SIZE (GET_MODE (x)) > 2) - *total = COSTS_N_INSNS (10); - else - *total = COSTS_N_INSNS (2); - return true; - - case PLUS: - case MINUS: - case ASHIFT: - case ASHIFTRT: - case LSHIFTRT: - *total = COSTS_N_INSNS (1); - return true; - - case DIV: - case UDIV: - case MOD: - case UMOD: - /* We make divide expensive, so that divide-by-constant will be - optimized to a multiply. */ - *total = COSTS_N_INSNS (60); - return true; - - default: - return false; - } -} - -/* Calculate the cost of moving data from a register in class FROM to - one in class TO, using MODE. */ - -int -ia64_register_move_cost (enum machine_mode mode, enum reg_class from, - enum reg_class to) -{ - /* ADDL_REGS is the same as GR_REGS for movement purposes. */ - if (to == ADDL_REGS) - to = GR_REGS; - if (from == ADDL_REGS) - from = GR_REGS; - - /* All costs are symmetric, so reduce cases by putting the - lower number class as the destination. */ - if (from < to) - { - enum reg_class tmp = to; - to = from, from = tmp; - } - - /* Moving from FR<->GR in XFmode must be more expensive than 2, - so that we get secondary memory reloads. Between FR_REGS, - we have to make this at least as expensive as MEMORY_MOVE_COST - to avoid spectacularly poor register class preferencing. */ - if (mode == XFmode || mode == RFmode) - { - if (to != GR_REGS || from != GR_REGS) - return MEMORY_MOVE_COST (mode, to, 0); - else - return 3; - } - - switch (to) - { - case PR_REGS: - /* Moving between PR registers takes two insns. */ - if (from == PR_REGS) - return 3; - /* Moving between PR and anything but GR is impossible. */ - if (from != GR_REGS) - return MEMORY_MOVE_COST (mode, to, 0); - break; - - case BR_REGS: - /* Moving between BR and anything but GR is impossible. */ - if (from != GR_REGS && from != GR_AND_BR_REGS) - return MEMORY_MOVE_COST (mode, to, 0); - break; - - case AR_I_REGS: - case AR_M_REGS: - /* Moving between AR and anything but GR is impossible. */ - if (from != GR_REGS) - return MEMORY_MOVE_COST (mode, to, 0); - break; - - case GR_REGS: - case FR_REGS: - case FP_REGS: - case GR_AND_FR_REGS: - case GR_AND_BR_REGS: - case ALL_REGS: - break; - - default: - gcc_unreachable (); - } - - return 2; -} - -/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS - to use when copying X into that class. */ - -enum reg_class -ia64_preferred_reload_class (rtx x, enum reg_class class) -{ - switch (class) - { - case FR_REGS: - case FP_REGS: - /* Don't allow volatile mem reloads into floating point registers. - This is defined to force reload to choose the r/m case instead - of the f/f case when reloading (set (reg fX) (mem/v)). */ - if (MEM_P (x) && MEM_VOLATILE_P (x)) - return NO_REGS; - - /* Force all unrecognized constants into the constant pool. */ - if (CONSTANT_P (x)) - return NO_REGS; - break; - - case AR_M_REGS: - case AR_I_REGS: - if (!OBJECT_P (x)) - return NO_REGS; - break; - - default: - break; - } - - return class; -} - -/* This function returns the register class required for a secondary - register when copying between one of the registers in CLASS, and X, - using MODE. A return value of NO_REGS means that no secondary register - is required. */ - -enum reg_class -ia64_secondary_reload_class (enum reg_class class, - enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) -{ - int regno = -1; - - if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) - regno = true_regnum (x); - - switch (class) - { - case BR_REGS: - case AR_M_REGS: - case AR_I_REGS: - /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global - interaction. We end up with two pseudos with overlapping lifetimes - both of which are equiv to the same constant, and both which need - to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end - changes depending on the path length, which means the qty_first_reg - check in make_regs_eqv can give different answers at different times. - At some point I'll probably need a reload_indi pattern to handle - this. - - We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we - wound up with a FP register from GR_AND_FR_REGS. Extend that to all - non-general registers for good measure. */ - if (regno >= 0 && ! GENERAL_REGNO_P (regno)) - return GR_REGS; - - /* This is needed if a pseudo used as a call_operand gets spilled to a - stack slot. */ - if (GET_CODE (x) == MEM) - return GR_REGS; - break; - - case FR_REGS: - case FP_REGS: - /* Need to go through general registers to get to other class regs. */ - if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) - return GR_REGS; - - /* This can happen when a paradoxical subreg is an operand to the - muldi3 pattern. */ - /* ??? This shouldn't be necessary after instruction scheduling is - enabled, because paradoxical subregs are not accepted by - register_operand when INSN_SCHEDULING is defined. Or alternatively, - stop the paradoxical subreg stupidity in the *_operand functions - in recog.c. */ - if (GET_CODE (x) == MEM - && (GET_MODE (x) == SImode || GET_MODE (x) == HImode - || GET_MODE (x) == QImode)) - return GR_REGS; - - /* This can happen because of the ior/and/etc patterns that accept FP - registers as operands. If the third operand is a constant, then it - needs to be reloaded into a FP register. */ - if (GET_CODE (x) == CONST_INT) - return GR_REGS; - - /* This can happen because of register elimination in a muldi3 insn. - E.g. `26107 * (unsigned long)&u'. */ - if (GET_CODE (x) == PLUS) - return GR_REGS; - break; - - case PR_REGS: - /* ??? This happens if we cse/gcse a BImode value across a call, - and the function has a nonlocal goto. This is because global - does not allocate call crossing pseudos to hard registers when - current_function_has_nonlocal_goto is true. This is relatively - common for C++ programs that use exceptions. To reproduce, - return NO_REGS and compile libstdc++. */ - if (GET_CODE (x) == MEM) - return GR_REGS; - - /* This can happen when we take a BImode subreg of a DImode value, - and that DImode value winds up in some non-GR register. */ - if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) - return GR_REGS; - break; - - default: - break; - } - - return NO_REGS; -} - - -/* Parse the -mfixed-range= option string. */ - -static void -fix_range (const char *const_str) -{ - int i, first, last; - char *str, *dash, *comma; - - /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and - REG2 are either register names or register numbers. The effect - of this option is to mark the registers in the range from REG1 to - REG2 as ``fixed'' so they won't be used by the compiler. This is - used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ - - i = strlen (const_str); - str = (char *) alloca (i + 1); - memcpy (str, const_str, i + 1); - - while (1) - { - dash = strchr (str, '-'); - if (!dash) - { - warning (0, "value of -mfixed-range must have form REG1-REG2"); - return; - } - *dash = '\0'; - - comma = strchr (dash + 1, ','); - if (comma) - *comma = '\0'; - - first = decode_reg_name (str); - if (first < 0) - { - warning (0, "unknown register name: %s", str); - return; - } - - last = decode_reg_name (dash + 1); - if (last < 0) - { - warning (0, "unknown register name: %s", dash + 1); - return; - } - - *dash = '-'; - - if (first > last) - { - warning (0, "%s-%s is an empty range", str, dash + 1); - return; - } - - for (i = first; i <= last; ++i) - fixed_regs[i] = call_used_regs[i] = 1; - - if (!comma) - break; - - *comma = ','; - str = comma + 1; - } -} - -/* Implement TARGET_HANDLE_OPTION. */ - -static bool -ia64_handle_option (size_t code, const char *arg, int value) -{ - switch (code) - { - case OPT_mfixed_range_: - fix_range (arg); - return true; - - case OPT_mtls_size_: - if (value != 14 && value != 22 && value != 64) - error ("bad value %<%s%> for -mtls-size= switch", arg); - return true; - - case OPT_mtune_: - { - static struct pta - { - const char *name; /* processor name or nickname. */ - enum processor_type processor; - } - const processor_alias_table[] = - { - {"itanium", PROCESSOR_ITANIUM}, - {"itanium1", PROCESSOR_ITANIUM}, - {"merced", PROCESSOR_ITANIUM}, - {"itanium2", PROCESSOR_ITANIUM2}, - {"mckinley", PROCESSOR_ITANIUM2}, - }; - int const pta_size = ARRAY_SIZE (processor_alias_table); - int i; - - for (i = 0; i < pta_size; i++) - if (!strcmp (arg, processor_alias_table[i].name)) - { - ia64_tune = processor_alias_table[i].processor; - break; - } - if (i == pta_size) - error ("bad value %<%s%> for -mtune= switch", arg); - return true; - } - - default: - return true; - } -} - -/* Implement OVERRIDE_OPTIONS. */ - -void -ia64_override_options (void) -{ - if (TARGET_AUTO_PIC) - target_flags |= MASK_CONST_GP; - - if (TARGET_INLINE_SQRT == INL_MIN_LAT) - { - warning (0, "not yet implemented: latency-optimized inline square root"); - TARGET_INLINE_SQRT = INL_MAX_THR; - } - - ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; - flag_schedule_insns_after_reload = 0; - - ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; - - init_machine_status = ia64_init_machine_status; -} - -static struct machine_function * -ia64_init_machine_status (void) -{ - return ggc_alloc_cleared (sizeof (struct machine_function)); -} - -static enum attr_itanium_class ia64_safe_itanium_class (rtx); -static enum attr_type ia64_safe_type (rtx); - -static enum attr_itanium_class -ia64_safe_itanium_class (rtx insn) -{ - if (recog_memoized (insn) >= 0) - return get_attr_itanium_class (insn); - else - return ITANIUM_CLASS_UNKNOWN; -} - -static enum attr_type -ia64_safe_type (rtx insn) -{ - if (recog_memoized (insn) >= 0) - return get_attr_type (insn); - else - return TYPE_UNKNOWN; -} - -/* The following collection of routines emit instruction group stop bits as - necessary to avoid dependencies. */ - -/* Need to track some additional registers as far as serialization is - concerned so we can properly handle br.call and br.ret. We could - make these registers visible to gcc, but since these registers are - never explicitly used in gcc generated code, it seems wasteful to - do so (plus it would make the call and return patterns needlessly - complex). */ -#define REG_RP (BR_REG (0)) -#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) -/* This is used for volatile asms which may require a stop bit immediately - before and after them. */ -#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) -#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) -#define NUM_REGS (AR_UNAT_BIT_0 + 64) - -/* For each register, we keep track of how it has been written in the - current instruction group. - - If a register is written unconditionally (no qualifying predicate), - WRITE_COUNT is set to 2 and FIRST_PRED is ignored. - - If a register is written if its qualifying predicate P is true, we - set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register - may be written again by the complement of P (P^1) and when this happens, - WRITE_COUNT gets set to 2. - - The result of this is that whenever an insn attempts to write a register - whose WRITE_COUNT is two, we need to issue an insn group barrier first. - - If a predicate register is written by a floating-point insn, we set - WRITTEN_BY_FP to true. - - If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND - to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ - -struct reg_write_state -{ - unsigned int write_count : 2; - unsigned int first_pred : 16; - unsigned int written_by_fp : 1; - unsigned int written_by_and : 1; - unsigned int written_by_or : 1; -}; - -/* Cumulative info for the current instruction group. */ -struct reg_write_state rws_sum[NUM_REGS]; -/* Info for the current instruction. This gets copied to rws_sum after a - stop bit is emitted. */ -struct reg_write_state rws_insn[NUM_REGS]; - -/* Indicates whether this is the first instruction after a stop bit, - in which case we don't need another stop bit. Without this, - ia64_variable_issue will die when scheduling an alloc. */ -static int first_instruction; - -/* Misc flags needed to compute RAW/WAW dependencies while we are traversing - RTL for one instruction. */ -struct reg_flags -{ - unsigned int is_write : 1; /* Is register being written? */ - unsigned int is_fp : 1; /* Is register used as part of an fp op? */ - unsigned int is_branch : 1; /* Is register used as part of a branch? */ - unsigned int is_and : 1; /* Is register used as part of and.orcm? */ - unsigned int is_or : 1; /* Is register used as part of or.andcm? */ - unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ -}; - -static void rws_update (struct reg_write_state *, int, struct reg_flags, int); -static int rws_access_regno (int, struct reg_flags, int); -static int rws_access_reg (rtx, struct reg_flags, int); -static void update_set_flags (rtx, struct reg_flags *); -static int set_src_needs_barrier (rtx, struct reg_flags, int); -static int rtx_needs_barrier (rtx, struct reg_flags, int); -static void init_insn_group_barriers (void); -static int group_barrier_needed (rtx); -static int safe_group_barrier_needed (rtx); - -/* Update *RWS for REGNO, which is being written by the current instruction, - with predicate PRED, and associated register flags in FLAGS. */ - -static void -rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred) -{ - if (pred) - rws[regno].write_count++; - else - rws[regno].write_count = 2; - rws[regno].written_by_fp |= flags.is_fp; - /* ??? Not tracking and/or across differing predicates. */ - rws[regno].written_by_and = flags.is_and; - rws[regno].written_by_or = flags.is_or; - rws[regno].first_pred = pred; -} - -/* Handle an access to register REGNO of type FLAGS using predicate register - PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates - a dependency with an earlier instruction in the same group. */ - -static int -rws_access_regno (int regno, struct reg_flags flags, int pred) -{ - int need_barrier = 0; - - gcc_assert (regno < NUM_REGS); - - if (! PR_REGNO_P (regno)) - flags.is_and = flags.is_or = 0; - - if (flags.is_write) - { - int write_count; - - /* One insn writes same reg multiple times? */ - gcc_assert (!rws_insn[regno].write_count); - - /* Update info for current instruction. */ - rws_update (rws_insn, regno, flags, pred); - write_count = rws_sum[regno].write_count; - - switch (write_count) - { - case 0: - /* The register has not been written yet. */ - rws_update (rws_sum, regno, flags, pred); - break; - - case 1: - /* The register has been written via a predicate. If this is - not a complementary predicate, then we need a barrier. */ - /* ??? This assumes that P and P+1 are always complementary - predicates for P even. */ - if (flags.is_and && rws_sum[regno].written_by_and) - ; - else if (flags.is_or && rws_sum[regno].written_by_or) - ; - else if ((rws_sum[regno].first_pred ^ 1) != pred) - need_barrier = 1; - rws_update (rws_sum, regno, flags, pred); - break; - - case 2: - /* The register has been unconditionally written already. We - need a barrier. */ - if (flags.is_and && rws_sum[regno].written_by_and) - ; - else if (flags.is_or && rws_sum[regno].written_by_or) - ; - else - need_barrier = 1; - rws_sum[regno].written_by_and = flags.is_and; - rws_sum[regno].written_by_or = flags.is_or; - break; - - default: - gcc_unreachable (); - } - } - else - { - if (flags.is_branch) - { - /* Branches have several RAW exceptions that allow to avoid - barriers. */ - - if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) - /* RAW dependencies on branch regs are permissible as long - as the writer is a non-branch instruction. Since we - never generate code that uses a branch register written - by a branch instruction, handling this case is - easy. */ - return 0; - - if (REGNO_REG_CLASS (regno) == PR_REGS - && ! rws_sum[regno].written_by_fp) - /* The predicates of a branch are available within the - same insn group as long as the predicate was written by - something other than a floating-point instruction. */ - return 0; - } - - if (flags.is_and && rws_sum[regno].written_by_and) - return 0; - if (flags.is_or && rws_sum[regno].written_by_or) - return 0; - - switch (rws_sum[regno].write_count) - { - case 0: - /* The register has not been written yet. */ - break; - - case 1: - /* The register has been written via a predicate. If this is - not a complementary predicate, then we need a barrier. */ - /* ??? This assumes that P and P+1 are always complementary - predicates for P even. */ - if ((rws_sum[regno].first_pred ^ 1) != pred) - need_barrier = 1; - break; - - case 2: - /* The register has been unconditionally written already. We - need a barrier. */ - need_barrier = 1; - break; - - default: - gcc_unreachable (); - } - } - - return need_barrier; -} - -static int -rws_access_reg (rtx reg, struct reg_flags flags, int pred) -{ - int regno = REGNO (reg); - int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); - - if (n == 1) - return rws_access_regno (regno, flags, pred); - else - { - int need_barrier = 0; - while (--n >= 0) - need_barrier |= rws_access_regno (regno + n, flags, pred); - return need_barrier; - } -} - -/* Examine X, which is a SET rtx, and update the flags, the predicate, and - the condition, stored in *PFLAGS, *PPRED and *PCOND. */ - -static void -update_set_flags (rtx x, struct reg_flags *pflags) -{ - rtx src = SET_SRC (x); - - switch (GET_CODE (src)) - { - case CALL: - return; - - case IF_THEN_ELSE: - /* There are four cases here: - (1) The destination is (pc), in which case this is a branch, - nothing here applies. - (2) The destination is ar.lc, in which case this is a - doloop_end_internal, - (3) The destination is an fp register, in which case this is - an fselect instruction. - (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case - this is a check load. - In all cases, nothing we do in this function applies. */ - return; - - default: - if (COMPARISON_P (src) - && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0)))) - /* Set pflags->is_fp to 1 so that we know we're dealing - with a floating point comparison when processing the - destination of the SET. */ - pflags->is_fp = 1; - - /* Discover if this is a parallel comparison. We only handle - and.orcm and or.andcm at present, since we must retain a - strict inverse on the predicate pair. */ - else if (GET_CODE (src) == AND) - pflags->is_and = 1; - else if (GET_CODE (src) == IOR) - pflags->is_or = 1; - - break; - } -} - -/* Subroutine of rtx_needs_barrier; this function determines whether the - source of a given SET rtx found in X needs a barrier. FLAGS and PRED - are as in rtx_needs_barrier. COND is an rtx that holds the condition - for this insn. */ - -static int -set_src_needs_barrier (rtx x, struct reg_flags flags, int pred) -{ - int need_barrier = 0; - rtx dst; - rtx src = SET_SRC (x); - - if (GET_CODE (src) == CALL) - /* We don't need to worry about the result registers that - get written by subroutine call. */ - return rtx_needs_barrier (src, flags, pred); - else if (SET_DEST (x) == pc_rtx) - { - /* X is a conditional branch. */ - /* ??? This seems redundant, as the caller sets this bit for - all JUMP_INSNs. */ - if (!ia64_spec_check_src_p (src)) - flags.is_branch = 1; - return rtx_needs_barrier (src, flags, pred); - } - - if (ia64_spec_check_src_p (src)) - /* Avoid checking one register twice (in condition - and in 'then' section) for ldc pattern. */ - { - gcc_assert (REG_P (XEXP (src, 2))); - need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred); - - /* We process MEM below. */ - src = XEXP (src, 1); - } - - need_barrier |= rtx_needs_barrier (src, flags, pred); - - dst = SET_DEST (x); - if (GET_CODE (dst) == ZERO_EXTRACT) - { - need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); - need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); - } - return need_barrier; -} - -/* Handle an access to rtx X of type FLAGS using predicate register - PRED. Return 1 if this access creates a dependency with an earlier - instruction in the same group. */ - -static int -rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) -{ - int i, j; - int is_complemented = 0; - int need_barrier = 0; - const char *format_ptr; - struct reg_flags new_flags; - rtx cond; - - if (! x) - return 0; - - new_flags = flags; - - switch (GET_CODE (x)) - { - case SET: - update_set_flags (x, &new_flags); - need_barrier = set_src_needs_barrier (x, new_flags, pred); - if (GET_CODE (SET_SRC (x)) != CALL) - { - new_flags.is_write = 1; - need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); - } - break; - - case CALL: - new_flags.is_write = 0; - need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); - - /* Avoid multiple register writes, in case this is a pattern with - multiple CALL rtx. This avoids a failure in rws_access_reg. */ - if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count) - { - new_flags.is_write = 1; - need_barrier |= rws_access_regno (REG_RP, new_flags, pred); - need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); - need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); - } - break; - - case COND_EXEC: - /* X is a predicated instruction. */ - - cond = COND_EXEC_TEST (x); - gcc_assert (!pred); - need_barrier = rtx_needs_barrier (cond, flags, 0); - - if (GET_CODE (cond) == EQ) - is_complemented = 1; - cond = XEXP (cond, 0); - gcc_assert (GET_CODE (cond) == REG - && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS); - pred = REGNO (cond); - if (is_complemented) - ++pred; - - need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); - return need_barrier; - - case CLOBBER: - case USE: - /* Clobber & use are for earlier compiler-phases only. */ - break; - - case ASM_OPERANDS: - case ASM_INPUT: - /* We always emit stop bits for traditional asms. We emit stop bits - for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ - if (GET_CODE (x) != ASM_OPERANDS - || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) - { - /* Avoid writing the register multiple times if we have multiple - asm outputs. This avoids a failure in rws_access_reg. */ - if (! rws_insn[REG_VOLATILE].write_count) - { - new_flags.is_write = 1; - rws_access_regno (REG_VOLATILE, new_flags, pred); - } - return 1; - } - - /* For all ASM_OPERANDS, we must traverse the vector of input operands. - We cannot just fall through here since then we would be confused - by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate - traditional asms unlike their normal usage. */ - - for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) - if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) - need_barrier = 1; - break; - - case PARALLEL: - for (i = XVECLEN (x, 0) - 1; i >= 0; --i) - { - rtx pat = XVECEXP (x, 0, i); - switch (GET_CODE (pat)) - { - case SET: - update_set_flags (pat, &new_flags); - need_barrier |= set_src_needs_barrier (pat, new_flags, pred); - break; - - case USE: - case CALL: - case ASM_OPERANDS: - need_barrier |= rtx_needs_barrier (pat, flags, pred); - break; - - case CLOBBER: - case RETURN: - break; - - default: - gcc_unreachable (); - } - } - for (i = XVECLEN (x, 0) - 1; i >= 0; --i) - { - rtx pat = XVECEXP (x, 0, i); - if (GET_CODE (pat) == SET) - { - if (GET_CODE (SET_SRC (pat)) != CALL) - { - new_flags.is_write = 1; - need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, - pred); - } - } - else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) - need_barrier |= rtx_needs_barrier (pat, flags, pred); - } - break; - - case SUBREG: - need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred); - break; - case REG: - if (REGNO (x) == AR_UNAT_REGNUM) - { - for (i = 0; i < 64; ++i) - need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); - } - else - need_barrier = rws_access_reg (x, flags, pred); - break; - - case MEM: - /* Find the regs used in memory address computation. */ - new_flags.is_write = 0; - need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); - break; - - case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR: - case SYMBOL_REF: case LABEL_REF: case CONST: - break; - - /* Operators with side-effects. */ - case POST_INC: case POST_DEC: - gcc_assert (GET_CODE (XEXP (x, 0)) == REG); - - new_flags.is_write = 0; - need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); - new_flags.is_write = 1; - need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); - break; - - case POST_MODIFY: - gcc_assert (GET_CODE (XEXP (x, 0)) == REG); - - new_flags.is_write = 0; - need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); - need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); - new_flags.is_write = 1; - need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); - break; - - /* Handle common unary and binary ops for efficiency. */ - case COMPARE: case PLUS: case MINUS: case MULT: case DIV: - case MOD: case UDIV: case UMOD: case AND: case IOR: - case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: - case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: - case NE: case EQ: case GE: case GT: case LE: - case LT: case GEU: case GTU: case LEU: case LTU: - need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); - need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); - break; - - case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: - case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: - case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: - case SQRT: case FFS: case POPCOUNT: - need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); - break; - - case VEC_SELECT: - /* VEC_SELECT's second argument is a PARALLEL with integers that - describe the elements selected. On ia64, those integers are - always constants. Avoid walking the PARALLEL so that we don't - get confused with "normal" parallels and then die. */ - need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); - break; - - case UNSPEC: - switch (XINT (x, 1)) - { - case UNSPEC_LTOFF_DTPMOD: - case UNSPEC_LTOFF_DTPREL: - case UNSPEC_DTPREL: - case UNSPEC_LTOFF_TPREL: - case UNSPEC_TPREL: - case UNSPEC_PRED_REL_MUTEX: - case UNSPEC_PIC_CALL: - case UNSPEC_MF: - case UNSPEC_FETCHADD_ACQ: - case UNSPEC_BSP_VALUE: - case UNSPEC_FLUSHRS: - case UNSPEC_BUNDLE_SELECTOR: - break; - - case UNSPEC_GR_SPILL: - case UNSPEC_GR_RESTORE: - { - HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); - HOST_WIDE_INT bit = (offset >> 3) & 63; - - need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); - new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL); - need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, - new_flags, pred); - break; - } - - case UNSPEC_FR_SPILL: - case UNSPEC_FR_RESTORE: - case UNSPEC_GETF_EXP: - case UNSPEC_SETF_EXP: - case UNSPEC_ADDP4: - case UNSPEC_FR_SQRT_RECIP_APPROX: - case UNSPEC_LDA: - case UNSPEC_LDS: - case UNSPEC_LDSA: - case UNSPEC_CHKACLR: - case UNSPEC_CHKS: - need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); - break; - - case UNSPEC_FR_RECIP_APPROX: - case UNSPEC_SHRP: - case UNSPEC_COPYSIGN: - need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); - need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); - break; - - case UNSPEC_CMPXCHG_ACQ: - need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); - need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); - break; - - default: - gcc_unreachable (); - } - break; - - case UNSPEC_VOLATILE: - switch (XINT (x, 1)) - { - case UNSPECV_ALLOC: - /* Alloc must always be the first instruction of a group. - We force this by always returning true. */ - /* ??? We might get better scheduling if we explicitly check for - input/local/output register dependencies, and modify the - scheduler so that alloc is always reordered to the start of - the current group. We could then eliminate all of the - first_instruction code. */ - rws_access_regno (AR_PFS_REGNUM, flags, pred); - - new_flags.is_write = 1; - rws_access_regno (REG_AR_CFM, new_flags, pred); - return 1; - - case UNSPECV_SET_BSP: - need_barrier = 1; - break; - - case UNSPECV_BLOCKAGE: - case UNSPECV_INSN_GROUP_BARRIER: - case UNSPECV_BREAK: - case UNSPECV_PSAC_ALL: - case UNSPECV_PSAC_NORMAL: - return 0; - - default: - gcc_unreachable (); - } - break; - - case RETURN: - new_flags.is_write = 0; - need_barrier = rws_access_regno (REG_RP, flags, pred); - need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); - - new_flags.is_write = 1; - need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); - need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); - break; - - default: - format_ptr = GET_RTX_FORMAT (GET_CODE (x)); - for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) - switch (format_ptr[i]) - { - case '0': /* unused field */ - case 'i': /* integer */ - case 'n': /* note */ - case 'w': /* wide integer */ - case 's': /* pointer to string */ - case 'S': /* optional pointer to string */ - break; - - case 'e': - if (rtx_needs_barrier (XEXP (x, i), flags, pred)) - need_barrier = 1; - break; - - case 'E': - for (j = XVECLEN (x, i) - 1; j >= 0; --j) - if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) - need_barrier = 1; - break; - - default: - gcc_unreachable (); - } - break; - } - return need_barrier; -} - -/* Clear out the state for group_barrier_needed at the start of a - sequence of insns. */ - -static void -init_insn_group_barriers (void) -{ - memset (rws_sum, 0, sizeof (rws_sum)); - first_instruction = 1; -} - -/* Given the current state, determine whether a group barrier (a stop bit) is - necessary before INSN. Return nonzero if so. This modifies the state to - include the effects of INSN as a side-effect. */ - -static int -group_barrier_needed (rtx insn) -{ - rtx pat; - int need_barrier = 0; - struct reg_flags flags; - - memset (&flags, 0, sizeof (flags)); - switch (GET_CODE (insn)) - { - case NOTE: - break; - - case BARRIER: - /* A barrier doesn't imply an instruction group boundary. */ - break; - - case CODE_LABEL: - memset (rws_insn, 0, sizeof (rws_insn)); - return 1; - - case CALL_INSN: - flags.is_branch = 1; - flags.is_sibcall = SIBLING_CALL_P (insn); - memset (rws_insn, 0, sizeof (rws_insn)); - - /* Don't bundle a call following another call. */ - if ((pat = prev_active_insn (insn)) - && GET_CODE (pat) == CALL_INSN) - { - need_barrier = 1; - break; - } - - need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); - break; - - case JUMP_INSN: - if (!ia64_spec_check_p (insn)) - flags.is_branch = 1; - - /* Don't bundle a jump following a call. */ - if ((pat = prev_active_insn (insn)) - && GET_CODE (pat) == CALL_INSN) - { - need_barrier = 1; - break; - } - /* FALLTHRU */ - - case INSN: - if (GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (insn)) == CLOBBER) - /* Don't care about USE and CLOBBER "insns"---those are used to - indicate to the optimizer that it shouldn't get rid of - certain operations. */ - break; - - pat = PATTERN (insn); - - /* Ug. Hack hacks hacked elsewhere. */ - switch (recog_memoized (insn)) - { - /* We play dependency tricks with the epilogue in order - to get proper schedules. Undo this for dv analysis. */ - case CODE_FOR_epilogue_deallocate_stack: - case CODE_FOR_prologue_allocate_stack: - pat = XVECEXP (pat, 0, 0); - break; - - /* The pattern we use for br.cloop confuses the code above. - The second element of the vector is representative. */ - case CODE_FOR_doloop_end_internal: - pat = XVECEXP (pat, 0, 1); - break; - - /* Doesn't generate code. */ - case CODE_FOR_pred_rel_mutex: - case CODE_FOR_prologue_use: - return 0; - - default: - break; - } - - memset (rws_insn, 0, sizeof (rws_insn)); - need_barrier = rtx_needs_barrier (pat, flags, 0); - - /* Check to see if the previous instruction was a volatile - asm. */ - if (! need_barrier) - need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); - break; - - default: - gcc_unreachable (); - } - - if (first_instruction && INSN_P (insn) - && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE - && GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER) - { - need_barrier = 0; - first_instruction = 0; - } - - return need_barrier; -} - -/* Like group_barrier_needed, but do not clobber the current state. */ - -static int -safe_group_barrier_needed (rtx insn) -{ - struct reg_write_state rws_saved[NUM_REGS]; - int saved_first_instruction; - int t; - - memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved); - saved_first_instruction = first_instruction; - - t = group_barrier_needed (insn); - - memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved); - first_instruction = saved_first_instruction; - - return t; -} - -/* Scan the current function and insert stop bits as necessary to - eliminate dependencies. This function assumes that a final - instruction scheduling pass has been run which has already - inserted most of the necessary stop bits. This function only - inserts new ones at basic block boundaries, since these are - invisible to the scheduler. */ - -static void -emit_insn_group_barriers (FILE *dump) -{ - rtx insn; - rtx last_label = 0; - int insns_since_last_label = 0; - - init_insn_group_barriers (); - - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - if (GET_CODE (insn) == CODE_LABEL) - { - if (insns_since_last_label) - last_label = insn; - insns_since_last_label = 0; - } - else if (GET_CODE (insn) == NOTE - && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) - { - if (insns_since_last_label) - last_label = insn; - insns_since_last_label = 0; - } - else if (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE - && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) - { - init_insn_group_barriers (); - last_label = 0; - } - else if (INSN_P (insn)) - { - insns_since_last_label = 1; - - if (group_barrier_needed (insn)) - { - if (last_label) - { - if (dump) - fprintf (dump, "Emitting stop before label %d\n", - INSN_UID (last_label)); - emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); - insn = last_label; - - init_insn_group_barriers (); - last_label = 0; - } - } - } - } -} - -/* Like emit_insn_group_barriers, but run if no final scheduling pass was run. - This function has to emit all necessary group barriers. */ - -static void -emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) -{ - rtx insn; - - init_insn_group_barriers (); - - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - if (GET_CODE (insn) == BARRIER) - { - rtx last = prev_active_insn (insn); - - if (! last) - continue; - if (GET_CODE (last) == JUMP_INSN - && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) - last = prev_active_insn (last); - if (recog_memoized (last) != CODE_FOR_insn_group_barrier) - emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); - - init_insn_group_barriers (); - } - else if (INSN_P (insn)) - { - if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) - init_insn_group_barriers (); - else if (group_barrier_needed (insn)) - { - emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); - init_insn_group_barriers (); - group_barrier_needed (insn); - } - } - } -} - - - -/* Instruction scheduling support. */ - -#define NR_BUNDLES 10 - -/* A list of names of all available bundles. */ - -static const char *bundle_name [NR_BUNDLES] = -{ - ".mii", - ".mmi", - ".mfi", - ".mmf", -#if NR_BUNDLES == 10 - ".bbb", - ".mbb", -#endif - ".mib", - ".mmb", - ".mfb", - ".mlx" -}; - -/* Nonzero if we should insert stop bits into the schedule. */ - -int ia64_final_schedule = 0; - -/* Codes of the corresponding queried units: */ - -static int _0mii_, _0mmi_, _0mfi_, _0mmf_; -static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_; - -static int _1mii_, _1mmi_, _1mfi_, _1mmf_; -static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_; - -static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6; - -/* The following variable value is an insn group barrier. */ - -static rtx dfa_stop_insn; - -/* The following variable value is the last issued insn. */ - -static rtx last_scheduled_insn; - -/* The following variable value is size of the DFA state. */ - -static size_t dfa_state_size; - -/* The following variable value is pointer to a DFA state used as - temporary variable. */ - -static state_t temp_dfa_state = NULL; - -/* The following variable value is DFA state after issuing the last - insn. */ - -static state_t prev_cycle_state = NULL; - -/* The following array element values are TRUE if the corresponding - insn requires to add stop bits before it. */ - -static char *stops_p = NULL; - -/* The following array element values are ZERO for non-speculative - instructions and hold corresponding speculation check number for - speculative instructions. */ -static int *spec_check_no = NULL; - -/* Size of spec_check_no array. */ -static int max_uid = 0; - -/* The following variable is used to set up the mentioned above array. */ - -static int stop_before_p = 0; - -/* The following variable value is length of the arrays `clocks' and - `add_cycles'. */ - -static int clocks_length; - -/* The following array element values are cycles on which the - corresponding insn will be issued. The array is used only for - Itanium1. */ - -static int *clocks; - -/* The following array element values are numbers of cycles should be - added to improve insn scheduling for MM_insns for Itanium1. */ - -static int *add_cycles; - -/* The following variable value is number of data speculations in progress. */ -static int pending_data_specs = 0; - -static rtx ia64_single_set (rtx); -static void ia64_emit_insn_before (rtx, rtx); - -/* Map a bundle number to its pseudo-op. */ - -const char * -get_bundle_name (int b) -{ - return bundle_name[b]; -} - - -/* Return the maximum number of instructions a cpu can issue. */ - -static int -ia64_issue_rate (void) -{ - return 6; -} - -/* Helper function - like single_set, but look inside COND_EXEC. */ - -static rtx -ia64_single_set (rtx insn) -{ - rtx x = PATTERN (insn), ret; - if (GET_CODE (x) == COND_EXEC) - x = COND_EXEC_CODE (x); - if (GET_CODE (x) == SET) - return x; - - /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. - Although they are not classical single set, the second set is there just - to protect it from moving past FP-relative stack accesses. */ - switch (recog_memoized (insn)) - { - case CODE_FOR_prologue_allocate_stack: - case CODE_FOR_epilogue_deallocate_stack: - ret = XVECEXP (x, 0, 0); - break; - - default: - ret = single_set_2 (insn, x); - break; - } - - return ret; -} - -/* Adjust the cost of a scheduling dependency. - Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN. - COST is the current cost. */ - -static int -ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost) -{ - enum reg_note dep_type = (enum reg_note) dep_type1; - enum attr_itanium_class dep_class; - enum attr_itanium_class insn_class; - - if (dep_type != REG_DEP_OUTPUT) - return cost; - - insn_class = ia64_safe_itanium_class (insn); - dep_class = ia64_safe_itanium_class (dep_insn); - if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF - || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF) - return 0; - - return cost; -} - -/* Like emit_insn_before, but skip cycle_display notes. - ??? When cycle display notes are implemented, update this. */ - -static void -ia64_emit_insn_before (rtx insn, rtx before) -{ - emit_insn_before (insn, before); -} - -/* The following function marks insns who produce addresses for load - and store insns. Such insns will be placed into M slots because it - decrease latency time for Itanium1 (see function - `ia64_produce_address_p' and the DFA descriptions). */ - -static void -ia64_dependencies_evaluation_hook (rtx head, rtx tail) -{ - rtx insn, link, next, next_tail; - - /* Before reload, which_alternative is not set, which means that - ia64_safe_itanium_class will produce wrong results for (at least) - move instructions. */ - if (!reload_completed) - return; - - next_tail = NEXT_INSN (tail); - for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) - if (INSN_P (insn)) - insn->call = 0; - for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) - if (INSN_P (insn) - && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) - { - for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1)) - { - enum attr_itanium_class c; - - if (REG_NOTE_KIND (link) != REG_DEP_TRUE) - continue; - next = XEXP (link, 0); - c = ia64_safe_itanium_class (next); - if ((c == ITANIUM_CLASS_ST - || c == ITANIUM_CLASS_STF) - && ia64_st_address_bypass_p (insn, next)) - break; - else if ((c == ITANIUM_CLASS_LD - || c == ITANIUM_CLASS_FLD - || c == ITANIUM_CLASS_FLDP) - && ia64_ld_address_bypass_p (insn, next)) - break; - } - insn->call = link != 0; - } -} - -/* We're beginning a new block. Initialize data structures as necessary. */ - -static void -ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED, - int sched_verbose ATTRIBUTE_UNUSED, - int max_ready ATTRIBUTE_UNUSED) -{ -#ifdef ENABLE_CHECKING - rtx insn; - - if (reload_completed) - for (insn = NEXT_INSN (current_sched_info->prev_head); - insn != current_sched_info->next_tail; - insn = NEXT_INSN (insn)) - gcc_assert (!SCHED_GROUP_P (insn)); -#endif - last_scheduled_insn = NULL_RTX; - init_insn_group_barriers (); -} - -/* We're beginning a scheduling pass. Check assertion. */ - -static void -ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, - int sched_verbose ATTRIBUTE_UNUSED, - int max_ready ATTRIBUTE_UNUSED) -{ - gcc_assert (!pending_data_specs); -} - -/* Scheduling pass is now finished. Free/reset static variable. */ -static void -ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED, - int sched_verbose ATTRIBUTE_UNUSED) -{ - free (spec_check_no); - spec_check_no = 0; - max_uid = 0; -} - -/* We are about to being issuing insns for this clock cycle. - Override the default sort algorithm to better slot instructions. */ - -static int -ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, - int *pn_ready, int clock_var ATTRIBUTE_UNUSED, - int reorder_type) -{ - int n_asms; - int n_ready = *pn_ready; - rtx *e_ready = ready + n_ready; - rtx *insnp; - - if (sched_verbose) - fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type); - - if (reorder_type == 0) - { - /* First, move all USEs, CLOBBERs and other crud out of the way. */ - n_asms = 0; - for (insnp = ready; insnp < e_ready; insnp++) - if (insnp < e_ready) - { - rtx insn = *insnp; - enum attr_type t = ia64_safe_type (insn); - if (t == TYPE_UNKNOWN) - { - if (GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0) - { - rtx lowest = ready[n_asms]; - ready[n_asms] = insn; - *insnp = lowest; - n_asms++; - } - else - { - rtx highest = ready[n_ready - 1]; - ready[n_ready - 1] = insn; - *insnp = highest; - return 1; - } - } - } - - if (n_asms < n_ready) - { - /* Some normal insns to process. Skip the asms. */ - ready += n_asms; - n_ready -= n_asms; - } - else if (n_ready > 0) - return 1; - } - - if (ia64_final_schedule) - { - int deleted = 0; - int nr_need_stop = 0; - - for (insnp = ready; insnp < e_ready; insnp++) - if (safe_group_barrier_needed (*insnp)) - nr_need_stop++; - - if (reorder_type == 1 && n_ready == nr_need_stop) - return 0; - if (reorder_type == 0) - return 1; - insnp = e_ready; - /* Move down everything that needs a stop bit, preserving - relative order. */ - while (insnp-- > ready + deleted) - while (insnp >= ready + deleted) - { - rtx insn = *insnp; - if (! safe_group_barrier_needed (insn)) - break; - memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); - *ready = insn; - deleted++; - } - n_ready -= deleted; - ready += deleted; - } - - return 1; -} - -/* We are about to being issuing insns for this clock cycle. Override - the default sort algorithm to better slot instructions. */ - -static int -ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, - int clock_var) -{ - return ia64_dfa_sched_reorder (dump, sched_verbose, ready, - pn_ready, clock_var, 0); -} - -/* Like ia64_sched_reorder, but called after issuing each insn. - Override the default sort algorithm to better slot instructions. */ - -static int -ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED, - int sched_verbose ATTRIBUTE_UNUSED, rtx *ready, - int *pn_ready, int clock_var) -{ - if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn) - clocks [INSN_UID (last_scheduled_insn)] = clock_var; - return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, - clock_var, 1); -} - -/* We are about to issue INSN. Return the number of insns left on the - ready queue that can be issued this cycle. */ - -static int -ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED, - int sched_verbose ATTRIBUTE_UNUSED, - rtx insn ATTRIBUTE_UNUSED, - int can_issue_more ATTRIBUTE_UNUSED) -{ - if (current_sched_info->flags & DO_SPECULATION) - /* Modulo scheduling does not extend h_i_d when emitting - new instructions. Deal with it. */ - { - if (DONE_SPEC (insn) & BEGIN_DATA) - pending_data_specs++; - if (CHECK_SPEC (insn) & BEGIN_DATA) - pending_data_specs--; - } - - last_scheduled_insn = insn; - memcpy (prev_cycle_state, curr_state, dfa_state_size); - if (reload_completed) - { - int needed = group_barrier_needed (insn); - - gcc_assert (!needed); - if (GET_CODE (insn) == CALL_INSN) - init_insn_group_barriers (); - stops_p [INSN_UID (insn)] = stop_before_p; - stop_before_p = 0; - } - return 1; -} - -/* We are choosing insn from the ready queue. Return nonzero if INSN - can be chosen. */ - -static int -ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn) -{ - gcc_assert (insn && INSN_P (insn)); - return ((!reload_completed - || !safe_group_barrier_needed (insn)) - && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)); -} - -/* We are choosing insn from the ready queue. Return nonzero if INSN - can be chosen. */ - -static bool -ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn) -{ - gcc_assert (insn && INSN_P (insn)); - /* Size of ALAT is 32. As far as we perform conservative data speculation, - we keep ALAT half-empty. */ - return (pending_data_specs < 16 - || !(TODO_SPEC (insn) & BEGIN_DATA)); -} - -/* The following variable value is pseudo-insn used by the DFA insn - scheduler to change the DFA state when the simulated clock is - increased. */ - -static rtx dfa_pre_cycle_insn; - -/* We are about to being issuing INSN. Return nonzero if we cannot - issue it on given cycle CLOCK and return zero if we should not sort - the ready queue on the next clock start. */ - -static int -ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock, - int clock, int *sort_p) -{ - int setup_clocks_p = FALSE; - - gcc_assert (insn && INSN_P (insn)); - if ((reload_completed && safe_group_barrier_needed (insn)) - || (last_scheduled_insn - && (GET_CODE (last_scheduled_insn) == CALL_INSN - || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT - || asm_noperands (PATTERN (last_scheduled_insn)) >= 0))) - { - init_insn_group_barriers (); - if (verbose && dump) - fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn), - last_clock == clock ? " + cycle advance" : ""); - stop_before_p = 1; - if (last_clock == clock) - { - state_transition (curr_state, dfa_stop_insn); - if (TARGET_EARLY_STOP_BITS) - *sort_p = (last_scheduled_insn == NULL_RTX - || GET_CODE (last_scheduled_insn) != CALL_INSN); - else - *sort_p = 0; - return 1; - } - else if (reload_completed) - setup_clocks_p = TRUE; - if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT - || asm_noperands (PATTERN (last_scheduled_insn)) >= 0) - state_reset (curr_state); - else - { - memcpy (curr_state, prev_cycle_state, dfa_state_size); - state_transition (curr_state, dfa_stop_insn); - state_transition (curr_state, dfa_pre_cycle_insn); - state_transition (curr_state, NULL); - } - } - else if (reload_completed) - setup_clocks_p = TRUE; - if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM - && GET_CODE (PATTERN (insn)) != ASM_INPUT - && asm_noperands (PATTERN (insn)) < 0) - { - enum attr_itanium_class c = ia64_safe_itanium_class (insn); - - if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF) - { - rtx link; - int d = -1; - - for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == 0) - { - enum attr_itanium_class dep_class; - rtx dep_insn = XEXP (link, 0); - - dep_class = ia64_safe_itanium_class (dep_insn); - if ((dep_class == ITANIUM_CLASS_MMMUL - || dep_class == ITANIUM_CLASS_MMSHF) - && last_clock - clocks [INSN_UID (dep_insn)] < 4 - && (d < 0 - || last_clock - clocks [INSN_UID (dep_insn)] < d)) - d = last_clock - clocks [INSN_UID (dep_insn)]; - } - if (d >= 0) - add_cycles [INSN_UID (insn)] = 3 - d; - } - } - return 0; -} - -/* Implement targetm.sched.h_i_d_extended hook. - Extend internal data structures. */ -static void -ia64_h_i_d_extended (void) -{ - if (current_sched_info->flags & DO_SPECULATION) - { - int new_max_uid = get_max_uid () + 1; - - spec_check_no = xrecalloc (spec_check_no, new_max_uid, - max_uid, sizeof (*spec_check_no)); - max_uid = new_max_uid; - } - - if (stops_p != NULL) - { - int new_clocks_length = get_max_uid () + 1; - - stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1); - - if (ia64_tune == PROCESSOR_ITANIUM) - { - clocks = xrecalloc (clocks, new_clocks_length, clocks_length, - sizeof (int)); - add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length, - sizeof (int)); - } - - clocks_length = new_clocks_length; - } -} - -/* Constants that help mapping 'enum machine_mode' to int. */ -enum SPEC_MODES - { - SPEC_MODE_INVALID = -1, - SPEC_MODE_FIRST = 0, - SPEC_MODE_FOR_EXTEND_FIRST = 1, - SPEC_MODE_FOR_EXTEND_LAST = 3, - SPEC_MODE_LAST = 8 - }; - -/* Return index of the MODE. */ -static int -ia64_mode_to_int (enum machine_mode mode) -{ - switch (mode) - { - case BImode: return 0; /* SPEC_MODE_FIRST */ - case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */ - case HImode: return 2; - case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */ - case DImode: return 4; - case SFmode: return 5; - case DFmode: return 6; - case XFmode: return 7; - case TImode: - /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not - mentioned in itanium[12].md. Predicate fp_register_operand also - needs to be defined. Bottom line: better disable for now. */ - return SPEC_MODE_INVALID; - default: return SPEC_MODE_INVALID; - } -} - -/* Provide information about speculation capabilities. */ -static void -ia64_set_sched_flags (spec_info_t spec_info) -{ - unsigned int *flags = &(current_sched_info->flags); - - if (*flags & SCHED_RGN - || *flags & SCHED_EBB) - { - int mask = 0; - - if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0) - || (mflag_sched_ar_data_spec && reload_completed)) - { - mask |= BEGIN_DATA; - - if ((mflag_sched_br_in_data_spec && !reload_completed) - || (mflag_sched_ar_in_data_spec && reload_completed)) - mask |= BE_IN_DATA; - } - - if (mflag_sched_control_spec) - { - mask |= BEGIN_CONTROL; - - if (mflag_sched_in_control_spec) - mask |= BE_IN_CONTROL; - } - - gcc_assert (*flags & USE_GLAT); - - if (mask) - { - *flags |= USE_DEPS_LIST | DETACH_LIFE_INFO | DO_SPECULATION; - - spec_info->mask = mask; - spec_info->flags = 0; - - if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns) - spec_info->flags |= PREFER_NON_DATA_SPEC; - - if ((mask & CONTROL_SPEC) - && mflag_sched_prefer_non_control_spec_insns) - spec_info->flags |= PREFER_NON_CONTROL_SPEC; - - if (mflag_sched_spec_verbose) - { - if (sched_verbose >= 1) - spec_info->dump = sched_dump; - else - spec_info->dump = stderr; - } - else - spec_info->dump = 0; - - if (mflag_sched_count_spec_in_critical_path) - spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH; - } - } -} - -/* Implement targetm.sched.speculate_insn hook. - Check if the INSN can be TS speculative. - If 'no' - return -1. - If 'yes' - generate speculative pattern in the NEW_PAT and return 1. - If current pattern of the INSN already provides TS speculation, return 0. */ -static int -ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat) -{ - rtx pat, reg, mem, mem_reg; - int mode_no, gen_p = 1; - bool extend_p; - - gcc_assert (!(ts & ~BEGIN_SPEC) && ts); - - pat = PATTERN (insn); - - if (GET_CODE (pat) == COND_EXEC) - pat = COND_EXEC_CODE (pat); - - /* This should be a SET ... */ - if (GET_CODE (pat) != SET) - return -1; - - reg = SET_DEST (pat); - /* ... to the general/fp register ... */ - if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg)))) - return -1; - - /* ... from the mem ... */ - mem = SET_SRC (pat); - - /* ... that can, possibly, be a zero_extend ... */ - if (GET_CODE (mem) == ZERO_EXTEND) - { - mem = XEXP (mem, 0); - extend_p = true; - } - else - extend_p = false; - - /* ... or a speculative load. */ - if (GET_CODE (mem) == UNSPEC) - { - int code; - - code = XINT (mem, 1); - if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA) - return -1; - - if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL)) - || (code == UNSPEC_LDS && !(ts & BEGIN_DATA)) - || code == UNSPEC_LDSA) - gen_p = 0; - - mem = XVECEXP (mem, 0, 0); - gcc_assert (MEM_P (mem)); - } - - /* Source should be a mem ... */ - if (!MEM_P (mem)) - return -1; - - /* ... addressed by a register. */ - mem_reg = XEXP (mem, 0); - if (!REG_P (mem_reg)) - return -1; - - /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND - will always be DImode. */ - mode_no = ia64_mode_to_int (GET_MODE (mem)); - - if (mode_no == SPEC_MODE_INVALID - || (extend_p - && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no - && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))) - return -1; - - extract_insn_cached (insn); - gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]); - - *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p); - - return gen_p; -} - -enum - { - /* Offset to reach ZERO_EXTEND patterns. */ - SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1, - /* Number of patterns for each speculation mode. */ - SPEC_N = (SPEC_MODE_LAST - + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2) - }; - -enum SPEC_GEN_LD_MAP - { - /* Offset to ld.a patterns. */ - SPEC_GEN_A = 0 * SPEC_N, - /* Offset to ld.s patterns. */ - SPEC_GEN_S = 1 * SPEC_N, - /* Offset to ld.sa patterns. */ - SPEC_GEN_SA = 2 * SPEC_N, - /* Offset to ld.sa patterns. For this patterns corresponding ld.c will - mutate to chk.s. */ - SPEC_GEN_SA_FOR_S = 3 * SPEC_N - }; - -/* These offsets are used to get (4 * SPEC_N). */ -enum SPEC_GEN_CHECK_OFFSET - { - SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A, - SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA - }; - -/* If GEN_P is true, calculate the index of needed speculation check and return - speculative pattern for INSN with speculative mode TS, machine mode - MODE_NO and with ZERO_EXTEND (if EXTEND_P is true). - If GEN_P is false, just calculate the index of needed speculation check. */ -static rtx -ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p) -{ - rtx pat, new_pat; - int load_no; - int shift = 0; - - static rtx (* const gen_load[]) (rtx, rtx) = { - gen_movbi_advanced, - gen_movqi_advanced, - gen_movhi_advanced, - gen_movsi_advanced, - gen_movdi_advanced, - gen_movsf_advanced, - gen_movdf_advanced, - gen_movxf_advanced, - gen_movti_advanced, - gen_zero_extendqidi2_advanced, - gen_zero_extendhidi2_advanced, - gen_zero_extendsidi2_advanced, - - gen_movbi_speculative, - gen_movqi_speculative, - gen_movhi_speculative, - gen_movsi_speculative, - gen_movdi_speculative, - gen_movsf_speculative, - gen_movdf_speculative, - gen_movxf_speculative, - gen_movti_speculative, - gen_zero_extendqidi2_speculative, - gen_zero_extendhidi2_speculative, - gen_zero_extendsidi2_speculative, - - gen_movbi_speculative_advanced, - gen_movqi_speculative_advanced, - gen_movhi_speculative_advanced, - gen_movsi_speculative_advanced, - gen_movdi_speculative_advanced, - gen_movsf_speculative_advanced, - gen_movdf_speculative_advanced, - gen_movxf_speculative_advanced, - gen_movti_speculative_advanced, - gen_zero_extendqidi2_speculative_advanced, - gen_zero_extendhidi2_speculative_advanced, - gen_zero_extendsidi2_speculative_advanced, - - gen_movbi_speculative_advanced, - gen_movqi_speculative_advanced, - gen_movhi_speculative_advanced, - gen_movsi_speculative_advanced, - gen_movdi_speculative_advanced, - gen_movsf_speculative_advanced, - gen_movdf_speculative_advanced, - gen_movxf_speculative_advanced, - gen_movti_speculative_advanced, - gen_zero_extendqidi2_speculative_advanced, - gen_zero_extendhidi2_speculative_advanced, - gen_zero_extendsidi2_speculative_advanced - }; - - load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no; - - if (ts & BEGIN_DATA) - { - /* We don't need recovery because even if this is ld.sa - ALAT entry will be allocated only if NAT bit is set to zero. - So it is enough to use ld.c here. */ - - if (ts & BEGIN_CONTROL) - { - load_no += SPEC_GEN_SA; - - if (!mflag_sched_ldc) - shift = SPEC_GEN_CHKA_FOR_SA_OFFSET; - } - else - { - load_no += SPEC_GEN_A; - - if (!mflag_sched_ldc) - shift = SPEC_GEN_CHKA_FOR_A_OFFSET; - } - } - else if (ts & BEGIN_CONTROL) - { - /* ld.sa can be used instead of ld.s to avoid basic block splitting. */ - if (!mflag_control_ldc) - load_no += SPEC_GEN_S; - else - { - gcc_assert (mflag_sched_ldc); - load_no += SPEC_GEN_SA_FOR_S; - } - } - else - gcc_unreachable (); - - /* Set the desired check index. We add '1', because zero element in this - array means, that instruction with such uid is non-speculative. */ - spec_check_no[INSN_UID (insn)] = load_no + shift + 1; - - if (!gen_p) - return 0; - - new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]), - copy_rtx (recog_data.operand[1])); - - pat = PATTERN (insn); - if (GET_CODE (pat) == COND_EXEC) - new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx - (COND_EXEC_TEST (pat)), new_pat); - - return new_pat; -} - -/* Offset to branchy checks. */ -enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N }; - -/* Return nonzero, if INSN needs branchy recovery check. */ -static bool -ia64_needs_block_p (rtx insn) -{ - int check_no; - - check_no = spec_check_no[INSN_UID(insn)] - 1; - gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET); - - return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N) - || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N)); -} - -/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN. - If (LABEL != 0 || MUTATE_P), generate branchy recovery check. - Otherwise, generate a simple check. */ -static rtx -ia64_gen_check (rtx insn, rtx label, bool mutate_p) -{ - rtx op1, pat, check_pat; - - static rtx (* const gen_check[]) (rtx, rtx) = { - gen_movbi_clr, - gen_movqi_clr, - gen_movhi_clr, - gen_movsi_clr, - gen_movdi_clr, - gen_movsf_clr, - gen_movdf_clr, - gen_movxf_clr, - gen_movti_clr, - gen_zero_extendqidi2_clr, - gen_zero_extendhidi2_clr, - gen_zero_extendsidi2_clr, - - gen_speculation_check_bi, - gen_speculation_check_qi, - gen_speculation_check_hi, - gen_speculation_check_si, - gen_speculation_check_di, - gen_speculation_check_sf, - gen_speculation_check_df, - gen_speculation_check_xf, - gen_speculation_check_ti, - gen_speculation_check_di, - gen_speculation_check_di, - gen_speculation_check_di, - - gen_movbi_clr, - gen_movqi_clr, - gen_movhi_clr, - gen_movsi_clr, - gen_movdi_clr, - gen_movsf_clr, - gen_movdf_clr, - gen_movxf_clr, - gen_movti_clr, - gen_zero_extendqidi2_clr, - gen_zero_extendhidi2_clr, - gen_zero_extendsidi2_clr, - - gen_movbi_clr, - gen_movqi_clr, - gen_movhi_clr, - gen_movsi_clr, - gen_movdi_clr, - gen_movsf_clr, - gen_movdf_clr, - gen_movxf_clr, - gen_movti_clr, - gen_zero_extendqidi2_clr, - gen_zero_extendhidi2_clr, - gen_zero_extendsidi2_clr, - - gen_advanced_load_check_clr_bi, - gen_advanced_load_check_clr_qi, - gen_advanced_load_check_clr_hi, - gen_advanced_load_check_clr_si, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_sf, - gen_advanced_load_check_clr_df, - gen_advanced_load_check_clr_xf, - gen_advanced_load_check_clr_ti, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_di, - - /* Following checks are generated during mutation. */ - gen_advanced_load_check_clr_bi, - gen_advanced_load_check_clr_qi, - gen_advanced_load_check_clr_hi, - gen_advanced_load_check_clr_si, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_sf, - gen_advanced_load_check_clr_df, - gen_advanced_load_check_clr_xf, - gen_advanced_load_check_clr_ti, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_di, - - 0,0,0,0,0,0,0,0,0,0,0,0, - - gen_advanced_load_check_clr_bi, - gen_advanced_load_check_clr_qi, - gen_advanced_load_check_clr_hi, - gen_advanced_load_check_clr_si, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_sf, - gen_advanced_load_check_clr_df, - gen_advanced_load_check_clr_xf, - gen_advanced_load_check_clr_ti, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_di, - gen_advanced_load_check_clr_di, - - gen_speculation_check_bi, - gen_speculation_check_qi, - gen_speculation_check_hi, - gen_speculation_check_si, - gen_speculation_check_di, - gen_speculation_check_sf, - gen_speculation_check_df, - gen_speculation_check_xf, - gen_speculation_check_ti, - gen_speculation_check_di, - gen_speculation_check_di, - gen_speculation_check_di - }; - - extract_insn_cached (insn); - - if (label) - { - gcc_assert (mutate_p || ia64_needs_block_p (insn)); - op1 = label; - } - else - { - gcc_assert (!mutate_p && !ia64_needs_block_p (insn)); - op1 = copy_rtx (recog_data.operand[1]); - } - - if (mutate_p) - /* INSN is ld.c. - Find the speculation check number by searching for original - speculative load in the RESOLVED_DEPS list of INSN. - As long as patterns are unique for each instruction, this can be - accomplished by matching ORIG_PAT fields. */ - { - rtx link; - int check_no = 0; - rtx orig_pat = ORIG_PAT (insn); - - for (link = RESOLVED_DEPS (insn); link; link = XEXP (link, 1)) - { - rtx x = XEXP (link, 0); - - if (ORIG_PAT (x) == orig_pat) - check_no = spec_check_no[INSN_UID (x)]; - } - gcc_assert (check_no); - - spec_check_no[INSN_UID (insn)] = (check_no - + SPEC_GEN_CHECK_MUTATION_OFFSET); - } - - check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1] - (copy_rtx (recog_data.operand[0]), op1)); - - pat = PATTERN (insn); - if (GET_CODE (pat) == COND_EXEC) - check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), - check_pat); - - return check_pat; -} - -/* Return nonzero, if X is branchy recovery check. */ -static int -ia64_spec_check_p (rtx x) -{ - x = PATTERN (x); - if (GET_CODE (x) == COND_EXEC) - x = COND_EXEC_CODE (x); - if (GET_CODE (x) == SET) - return ia64_spec_check_src_p (SET_SRC (x)); - return 0; -} - -/* Return nonzero, if SRC belongs to recovery check. */ -static int -ia64_spec_check_src_p (rtx src) -{ - if (GET_CODE (src) == IF_THEN_ELSE) - { - rtx t; - - t = XEXP (src, 0); - if (GET_CODE (t) == NE) - { - t = XEXP (t, 0); - - if (GET_CODE (t) == UNSPEC) - { - int code; - - code = XINT (t, 1); - - if (code == UNSPEC_CHKACLR - || code == UNSPEC_CHKS - || code == UNSPEC_LDCCLR) - { - gcc_assert (code != 0); - return code; - } - } - } - } - return 0; -} - - -/* The following page contains abstract data `bundle states' which are - used for bundling insns (inserting nops and template generation). */ - -/* The following describes state of insn bundling. */ - -struct bundle_state -{ - /* Unique bundle state number to identify them in the debugging - output */ - int unique_num; - rtx insn; /* corresponding insn, NULL for the 1st and the last state */ - /* number nops before and after the insn */ - short before_nops_num, after_nops_num; - int insn_num; /* insn number (0 - for initial state, 1 - for the 1st - insn */ - int cost; /* cost of the state in cycles */ - int accumulated_insns_num; /* number of all previous insns including - nops. L is considered as 2 insns */ - int branch_deviation; /* deviation of previous branches from 3rd slots */ - struct bundle_state *next; /* next state with the same insn_num */ - struct bundle_state *originator; /* originator (previous insn state) */ - /* All bundle states are in the following chain. */ - struct bundle_state *allocated_states_chain; - /* The DFA State after issuing the insn and the nops. */ - state_t dfa_state; -}; - -/* The following is map insn number to the corresponding bundle state. */ - -static struct bundle_state **index_to_bundle_states; - -/* The unique number of next bundle state. */ - -static int bundle_states_num; - -/* All allocated bundle states are in the following chain. */ - -static struct bundle_state *allocated_bundle_states_chain; - -/* All allocated but not used bundle states are in the following - chain. */ - -static struct bundle_state *free_bundle_state_chain; - - -/* The following function returns a free bundle state. */ - -static struct bundle_state * -get_free_bundle_state (void) -{ - struct bundle_state *result; - - if (free_bundle_state_chain != NULL) - { - result = free_bundle_state_chain; - free_bundle_state_chain = result->next; - } - else - { - result = xmalloc (sizeof (struct bundle_state)); - result->dfa_state = xmalloc (dfa_state_size); - result->allocated_states_chain = allocated_bundle_states_chain; - allocated_bundle_states_chain = result; - } - result->unique_num = bundle_states_num++; - return result; - -} - -/* The following function frees given bundle state. */ - -static void -free_bundle_state (struct bundle_state *state) -{ - state->next = free_bundle_state_chain; - free_bundle_state_chain = state; -} - -/* Start work with abstract data `bundle states'. */ - -static void -initiate_bundle_states (void) -{ - bundle_states_num = 0; - free_bundle_state_chain = NULL; - allocated_bundle_states_chain = NULL; -} - -/* Finish work with abstract data `bundle states'. */ - -static void -finish_bundle_states (void) -{ - struct bundle_state *curr_state, *next_state; - - for (curr_state = allocated_bundle_states_chain; - curr_state != NULL; - curr_state = next_state) - { - next_state = curr_state->allocated_states_chain; - free (curr_state->dfa_state); - free (curr_state); - } -} - -/* Hash table of the bundle states. The key is dfa_state and insn_num - of the bundle states. */ - -static htab_t bundle_state_table; - -/* The function returns hash of BUNDLE_STATE. */ - -static unsigned -bundle_state_hash (const void *bundle_state) -{ - const struct bundle_state *state = (struct bundle_state *) bundle_state; - unsigned result, i; - - for (result = i = 0; i < dfa_state_size; i++) - result += (((unsigned char *) state->dfa_state) [i] - << ((i % CHAR_BIT) * 3 + CHAR_BIT)); - return result + state->insn_num; -} - -/* The function returns nonzero if the bundle state keys are equal. */ - -static int -bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2) -{ - const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1; - const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2; - - return (state1->insn_num == state2->insn_num - && memcmp (state1->dfa_state, state2->dfa_state, - dfa_state_size) == 0); -} - -/* The function inserts the BUNDLE_STATE into the hash table. The - function returns nonzero if the bundle has been inserted into the - table. The table contains the best bundle state with given key. */ - -static int -insert_bundle_state (struct bundle_state *bundle_state) -{ - void **entry_ptr; - - entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1); - if (*entry_ptr == NULL) - { - bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; - index_to_bundle_states [bundle_state->insn_num] = bundle_state; - *entry_ptr = (void *) bundle_state; - return TRUE; - } - else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost - || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost - && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num - > bundle_state->accumulated_insns_num - || (((struct bundle_state *) - *entry_ptr)->accumulated_insns_num - == bundle_state->accumulated_insns_num - && ((struct bundle_state *) - *entry_ptr)->branch_deviation - > bundle_state->branch_deviation)))) - - { - struct bundle_state temp; - - temp = *(struct bundle_state *) *entry_ptr; - *(struct bundle_state *) *entry_ptr = *bundle_state; - ((struct bundle_state *) *entry_ptr)->next = temp.next; - *bundle_state = temp; - } - return FALSE; -} - -/* Start work with the hash table. */ - -static void -initiate_bundle_state_table (void) -{ - bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p, - (htab_del) 0); -} - -/* Finish work with the hash table. */ - -static void -finish_bundle_state_table (void) -{ - htab_delete (bundle_state_table); -} - - - -/* The following variable is a insn `nop' used to check bundle states - with different number of inserted nops. */ - -static rtx ia64_nop; - -/* The following function tries to issue NOPS_NUM nops for the current - state without advancing processor cycle. If it failed, the - function returns FALSE and frees the current state. */ - -static int -try_issue_nops (struct bundle_state *curr_state, int nops_num) -{ - int i; - - for (i = 0; i < nops_num; i++) - if (state_transition (curr_state->dfa_state, ia64_nop) >= 0) - { - free_bundle_state (curr_state); - return FALSE; - } - return TRUE; -} - -/* The following function tries to issue INSN for the current - state without advancing processor cycle. If it failed, the - function returns FALSE and frees the current state. */ - -static int -try_issue_insn (struct bundle_state *curr_state, rtx insn) -{ - if (insn && state_transition (curr_state->dfa_state, insn) >= 0) - { - free_bundle_state (curr_state); - return FALSE; - } - return TRUE; -} - -/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN - starting with ORIGINATOR without advancing processor cycle. If - TRY_BUNDLE_END_P is TRUE, the function also/only (if - ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle. - If it was successful, the function creates new bundle state and - insert into the hash table and into `index_to_bundle_states'. */ - -static void -issue_nops_and_insn (struct bundle_state *originator, int before_nops_num, - rtx insn, int try_bundle_end_p, int only_bundle_end_p) -{ - struct bundle_state *curr_state; - - curr_state = get_free_bundle_state (); - memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size); - curr_state->insn = insn; - curr_state->insn_num = originator->insn_num + 1; - curr_state->cost = originator->cost; - curr_state->originator = originator; - curr_state->before_nops_num = before_nops_num; - curr_state->after_nops_num = 0; - curr_state->accumulated_insns_num - = originator->accumulated_insns_num + before_nops_num; - curr_state->branch_deviation = originator->branch_deviation; - gcc_assert (insn); - if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier) - { - gcc_assert (GET_MODE (insn) != TImode); - if (!try_issue_nops (curr_state, before_nops_num)) - return; - if (!try_issue_insn (curr_state, insn)) - return; - memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size); - if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0 - && curr_state->accumulated_insns_num % 3 != 0) - { - free_bundle_state (curr_state); - return; - } - } - else if (GET_MODE (insn) != TImode) - { - if (!try_issue_nops (curr_state, before_nops_num)) - return; - if (!try_issue_insn (curr_state, insn)) - return; - curr_state->accumulated_insns_num++; - gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT - && asm_noperands (PATTERN (insn)) < 0); - - if (ia64_safe_type (insn) == TYPE_L) - curr_state->accumulated_insns_num++; - } - else - { - /* If this is an insn that must be first in a group, then don't allow - nops to be emitted before it. Currently, alloc is the only such - supported instruction. */ - /* ??? The bundling automatons should handle this for us, but they do - not yet have support for the first_insn attribute. */ - if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES) - { - free_bundle_state (curr_state); - return; - } - - state_transition (curr_state->dfa_state, dfa_pre_cycle_insn); - state_transition (curr_state->dfa_state, NULL); - curr_state->cost++; - if (!try_issue_nops (curr_state, before_nops_num)) - return; - if (!try_issue_insn (curr_state, insn)) - return; - curr_state->accumulated_insns_num++; - if (GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0) - { - /* Finish bundle containing asm insn. */ - curr_state->after_nops_num - = 3 - curr_state->accumulated_insns_num % 3; - curr_state->accumulated_insns_num - += 3 - curr_state->accumulated_insns_num % 3; - } - else if (ia64_safe_type (insn) == TYPE_L) - curr_state->accumulated_insns_num++; - } - if (ia64_safe_type (insn) == TYPE_B) - curr_state->branch_deviation - += 2 - (curr_state->accumulated_insns_num - 1) % 3; - if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0) - { - if (!only_bundle_end_p && insert_bundle_state (curr_state)) - { - state_t dfa_state; - struct bundle_state *curr_state1; - struct bundle_state *allocated_states_chain; - - curr_state1 = get_free_bundle_state (); - dfa_state = curr_state1->dfa_state; - allocated_states_chain = curr_state1->allocated_states_chain; - *curr_state1 = *curr_state; - curr_state1->dfa_state = dfa_state; - curr_state1->allocated_states_chain = allocated_states_chain; - memcpy (curr_state1->dfa_state, curr_state->dfa_state, - dfa_state_size); - curr_state = curr_state1; - } - if (!try_issue_nops (curr_state, - 3 - curr_state->accumulated_insns_num % 3)) - return; - curr_state->after_nops_num - = 3 - curr_state->accumulated_insns_num % 3; - curr_state->accumulated_insns_num - += 3 - curr_state->accumulated_insns_num % 3; - } - if (!insert_bundle_state (curr_state)) - free_bundle_state (curr_state); - return; -} - -/* The following function returns position in the two window bundle - for given STATE. */ - -static int -get_max_pos (state_t state) -{ - if (cpu_unit_reservation_p (state, pos_6)) - return 6; - else if (cpu_unit_reservation_p (state, pos_5)) - return 5; - else if (cpu_unit_reservation_p (state, pos_4)) - return 4; - else if (cpu_unit_reservation_p (state, pos_3)) - return 3; - else if (cpu_unit_reservation_p (state, pos_2)) - return 2; - else if (cpu_unit_reservation_p (state, pos_1)) - return 1; - else - return 0; -} - -/* The function returns code of a possible template for given position - and state. The function should be called only with 2 values of - position equal to 3 or 6. We avoid generating F NOPs by putting - templates containing F insns at the end of the template search - because undocumented anomaly in McKinley derived cores which can - cause stalls if an F-unit insn (including a NOP) is issued within a - six-cycle window after reading certain application registers (such - as ar.bsp). Furthermore, power-considerations also argue against - the use of F-unit instructions unless they're really needed. */ - -static int -get_template (state_t state, int pos) -{ - switch (pos) - { - case 3: - if (cpu_unit_reservation_p (state, _0mmi_)) - return 1; - else if (cpu_unit_reservation_p (state, _0mii_)) - return 0; - else if (cpu_unit_reservation_p (state, _0mmb_)) - return 7; - else if (cpu_unit_reservation_p (state, _0mib_)) - return 6; - else if (cpu_unit_reservation_p (state, _0mbb_)) - return 5; - else if (cpu_unit_reservation_p (state, _0bbb_)) - return 4; - else if (cpu_unit_reservation_p (state, _0mmf_)) - return 3; - else if (cpu_unit_reservation_p (state, _0mfi_)) - return 2; - else if (cpu_unit_reservation_p (state, _0mfb_)) - return 8; - else if (cpu_unit_reservation_p (state, _0mlx_)) - return 9; - else - gcc_unreachable (); - case 6: - if (cpu_unit_reservation_p (state, _1mmi_)) - return 1; - else if (cpu_unit_reservation_p (state, _1mii_)) - return 0; - else if (cpu_unit_reservation_p (state, _1mmb_)) - return 7; - else if (cpu_unit_reservation_p (state, _1mib_)) - return 6; - else if (cpu_unit_reservation_p (state, _1mbb_)) - return 5; - else if (cpu_unit_reservation_p (state, _1bbb_)) - return 4; - else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_)) - return 3; - else if (cpu_unit_reservation_p (state, _1mfi_)) - return 2; - else if (cpu_unit_reservation_p (state, _1mfb_)) - return 8; - else if (cpu_unit_reservation_p (state, _1mlx_)) - return 9; - else - gcc_unreachable (); - default: - gcc_unreachable (); - } -} - -/* The following function returns an insn important for insn bundling - followed by INSN and before TAIL. */ - -static rtx -get_next_important_insn (rtx insn, rtx tail) -{ - for (; insn && insn != tail; insn = NEXT_INSN (insn)) - if (INSN_P (insn) - && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE - && GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER) - return insn; - return NULL_RTX; -} - -/* Add a bundle selector TEMPLATE0 before INSN. */ - -static void -ia64_add_bundle_selector_before (int template0, rtx insn) -{ - rtx b = gen_bundle_selector (GEN_INT (template0)); - - ia64_emit_insn_before (b, insn); -#if NR_BUNDLES == 10 - if ((template0 == 4 || template0 == 5) - && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))) - { - int i; - rtx note = NULL_RTX; - - /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the - first or second slot. If it is and has REG_EH_NOTE set, copy it - to following nops, as br.call sets rp to the address of following - bundle and therefore an EH region end must be on a bundle - boundary. */ - insn = PREV_INSN (insn); - for (i = 0; i < 3; i++) - { - do - insn = next_active_insn (insn); - while (GET_CODE (insn) == INSN - && get_attr_empty (insn) == EMPTY_YES); - if (GET_CODE (insn) == CALL_INSN) - note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); - else if (note) - { - int code; - - gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop - || code == CODE_FOR_nop_b); - if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) - note = NULL_RTX; - else - REG_NOTES (insn) - = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0), - REG_NOTES (insn)); - } - } - } -#endif -} - -/* The following function does insn bundling. Bundling means - inserting templates and nop insns to fit insn groups into permitted - templates. Instruction scheduling uses NDFA (non-deterministic - finite automata) encoding informations about the templates and the - inserted nops. Nondeterminism of the automata permits follows - all possible insn sequences very fast. - - Unfortunately it is not possible to get information about inserting - nop insns and used templates from the automata states. The - automata only says that we can issue an insn possibly inserting - some nops before it and using some template. Therefore insn - bundling in this function is implemented by using DFA - (deterministic finite automata). We follow all possible insn - sequences by inserting 0-2 nops (that is what the NDFA describe for - insn scheduling) before/after each insn being bundled. We know the - start of simulated processor cycle from insn scheduling (insn - starting a new cycle has TImode). - - Simple implementation of insn bundling would create enormous - number of possible insn sequences satisfying information about new - cycle ticks taken from the insn scheduling. To make the algorithm - practical we use dynamic programming. Each decision (about - inserting nops and implicitly about previous decisions) is described - by structure bundle_state (see above). If we generate the same - bundle state (key is automaton state after issuing the insns and - nops for it), we reuse already generated one. As consequence we - reject some decisions which cannot improve the solution and - reduce memory for the algorithm. - - When we reach the end of EBB (extended basic block), we choose the - best sequence and then, moving back in EBB, insert templates for - the best alternative. The templates are taken from querying - automaton state for each insn in chosen bundle states. - - So the algorithm makes two (forward and backward) passes through - EBB. There is an additional forward pass through EBB for Itanium1 - processor. This pass inserts more nops to make dependency between - a producer insn and MMMUL/MMSHF at least 4 cycles long. */ - -static void -bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail) -{ - struct bundle_state *curr_state, *next_state, *best_state; - rtx insn, next_insn; - int insn_num; - int i, bundle_end_p, only_bundle_end_p, asm_p; - int pos = 0, max_pos, template0, template1; - rtx b; - rtx nop; - enum attr_type type; - - insn_num = 0; - /* Count insns in the EBB. */ - for (insn = NEXT_INSN (prev_head_insn); - insn && insn != tail; - insn = NEXT_INSN (insn)) - if (INSN_P (insn)) - insn_num++; - if (insn_num == 0) - return; - bundling_p = 1; - dfa_clean_insn_cache (); - initiate_bundle_state_table (); - index_to_bundle_states = xmalloc ((insn_num + 2) - * sizeof (struct bundle_state *)); - /* First (forward) pass -- generation of bundle states. */ - curr_state = get_free_bundle_state (); - curr_state->insn = NULL; - curr_state->before_nops_num = 0; - curr_state->after_nops_num = 0; - curr_state->insn_num = 0; - curr_state->cost = 0; - curr_state->accumulated_insns_num = 0; - curr_state->branch_deviation = 0; - curr_state->next = NULL; - curr_state->originator = NULL; - state_reset (curr_state->dfa_state); - index_to_bundle_states [0] = curr_state; - insn_num = 0; - /* Shift cycle mark if it is put on insn which could be ignored. */ - for (insn = NEXT_INSN (prev_head_insn); - insn != tail; - insn = NEXT_INSN (insn)) - if (INSN_P (insn) - && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE - || GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (insn)) == CLOBBER) - && GET_MODE (insn) == TImode) - { - PUT_MODE (insn, VOIDmode); - for (next_insn = NEXT_INSN (insn); - next_insn != tail; - next_insn = NEXT_INSN (next_insn)) - if (INSN_P (next_insn) - && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE - && GET_CODE (PATTERN (next_insn)) != USE - && GET_CODE (PATTERN (next_insn)) != CLOBBER) - { - PUT_MODE (next_insn, TImode); - break; - } - } - /* Forward pass: generation of bundle states. */ - for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); - insn != NULL_RTX; - insn = next_insn) - { - gcc_assert (INSN_P (insn) - && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE - && GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER); - type = ia64_safe_type (insn); - next_insn = get_next_important_insn (NEXT_INSN (insn), tail); - insn_num++; - index_to_bundle_states [insn_num] = NULL; - for (curr_state = index_to_bundle_states [insn_num - 1]; - curr_state != NULL; - curr_state = next_state) - { - pos = curr_state->accumulated_insns_num % 3; - next_state = curr_state->next; - /* We must fill up the current bundle in order to start a - subsequent asm insn in a new bundle. Asm insn is always - placed in a separate bundle. */ - only_bundle_end_p - = (next_insn != NULL_RTX - && INSN_CODE (insn) == CODE_FOR_insn_group_barrier - && ia64_safe_type (next_insn) == TYPE_UNKNOWN); - /* We may fill up the current bundle if it is the cycle end - without a group barrier. */ - bundle_end_p - = (only_bundle_end_p || next_insn == NULL_RTX - || (GET_MODE (next_insn) == TImode - && INSN_CODE (insn) != CODE_FOR_insn_group_barrier)); - if (type == TYPE_F || type == TYPE_B || type == TYPE_L - || type == TYPE_S - /* We need to insert 2 nops for cases like M_MII. To - guarantee issuing all insns on the same cycle for - Itanium 1, we need to issue 2 nops after the first M - insn (MnnMII where n is a nop insn). */ - || ((type == TYPE_M || type == TYPE_A) - && ia64_tune == PROCESSOR_ITANIUM - && !bundle_end_p && pos == 1)) - issue_nops_and_insn (curr_state, 2, insn, bundle_end_p, - only_bundle_end_p); - issue_nops_and_insn (curr_state, 1, insn, bundle_end_p, - only_bundle_end_p); - issue_nops_and_insn (curr_state, 0, insn, bundle_end_p, - only_bundle_end_p); - } - gcc_assert (index_to_bundle_states [insn_num]); - for (curr_state = index_to_bundle_states [insn_num]; - curr_state != NULL; - curr_state = curr_state->next) - if (verbose >= 2 && dump) - { - /* This structure is taken from generated code of the - pipeline hazard recognizer (see file insn-attrtab.c). - Please don't forget to change the structure if a new - automaton is added to .md file. */ - struct DFA_chip - { - unsigned short one_automaton_state; - unsigned short oneb_automaton_state; - unsigned short two_automaton_state; - unsigned short twob_automaton_state; - }; - - fprintf - (dump, - "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n", - curr_state->unique_num, - (curr_state->originator == NULL - ? -1 : curr_state->originator->unique_num), - curr_state->cost, - curr_state->before_nops_num, curr_state->after_nops_num, - curr_state->accumulated_insns_num, curr_state->branch_deviation, - (ia64_tune == PROCESSOR_ITANIUM - ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state - : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state), - INSN_UID (insn)); - } - } - - /* We should find a solution because the 2nd insn scheduling has - found one. */ - gcc_assert (index_to_bundle_states [insn_num]); - /* Find a state corresponding to the best insn sequence. */ - best_state = NULL; - for (curr_state = index_to_bundle_states [insn_num]; - curr_state != NULL; - curr_state = curr_state->next) - /* We are just looking at the states with fully filled up last - bundle. The first we prefer insn sequences with minimal cost - then with minimal inserted nops and finally with branch insns - placed in the 3rd slots. */ - if (curr_state->accumulated_insns_num % 3 == 0 - && (best_state == NULL || best_state->cost > curr_state->cost - || (best_state->cost == curr_state->cost - && (curr_state->accumulated_insns_num - < best_state->accumulated_insns_num - || (curr_state->accumulated_insns_num - == best_state->accumulated_insns_num - && curr_state->branch_deviation - < best_state->branch_deviation))))) - best_state = curr_state; - /* Second (backward) pass: adding nops and templates. */ - insn_num = best_state->before_nops_num; - template0 = template1 = -1; - for (curr_state = best_state; - curr_state->originator != NULL; - curr_state = curr_state->originator) - { - insn = curr_state->insn; - asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0); - insn_num++; - if (verbose >= 2 && dump) - { - struct DFA_chip - { - unsigned short one_automaton_state; - unsigned short oneb_automaton_state; - unsigned short two_automaton_state; - unsigned short twob_automaton_state; - }; - - fprintf - (dump, - "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n", - curr_state->unique_num, - (curr_state->originator == NULL - ? -1 : curr_state->originator->unique_num), - curr_state->cost, - curr_state->before_nops_num, curr_state->after_nops_num, - curr_state->accumulated_insns_num, curr_state->branch_deviation, - (ia64_tune == PROCESSOR_ITANIUM - ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state - : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state), - INSN_UID (insn)); - } - /* Find the position in the current bundle window. The window can - contain at most two bundles. Two bundle window means that - the processor will make two bundle rotation. */ - max_pos = get_max_pos (curr_state->dfa_state); - if (max_pos == 6 - /* The following (negative template number) means that the - processor did one bundle rotation. */ - || (max_pos == 3 && template0 < 0)) - { - /* We are at the end of the window -- find template(s) for - its bundle(s). */ - pos = max_pos; - if (max_pos == 3) - template0 = get_template (curr_state->dfa_state, 3); - else - { - template1 = get_template (curr_state->dfa_state, 3); - template0 = get_template (curr_state->dfa_state, 6); - } - } - if (max_pos > 3 && template1 < 0) - /* It may happen when we have the stop inside a bundle. */ - { - gcc_assert (pos <= 3); - template1 = get_template (curr_state->dfa_state, 3); - pos += 3; - } - if (!asm_p) - /* Emit nops after the current insn. */ - for (i = 0; i < curr_state->after_nops_num; i++) - { - nop = gen_nop (); - emit_insn_after (nop, insn); - pos--; - gcc_assert (pos >= 0); - if (pos % 3 == 0) - { - /* We are at the start of a bundle: emit the template - (it should be defined). */ - gcc_assert (template0 >= 0); - ia64_add_bundle_selector_before (template0, nop); - /* If we have two bundle window, we make one bundle - rotation. Otherwise template0 will be undefined - (negative value). */ - template0 = template1; - template1 = -1; - } - } - /* Move the position backward in the window. Group barrier has - no slot. Asm insn takes all bundle. */ - if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier - && GET_CODE (PATTERN (insn)) != ASM_INPUT - && asm_noperands (PATTERN (insn)) < 0) - pos--; - /* Long insn takes 2 slots. */ - if (ia64_safe_type (insn) == TYPE_L) - pos--; - gcc_assert (pos >= 0); - if (pos % 3 == 0 - && INSN_CODE (insn) != CODE_FOR_insn_group_barrier - && GET_CODE (PATTERN (insn)) != ASM_INPUT - && asm_noperands (PATTERN (insn)) < 0) - { - /* The current insn is at the bundle start: emit the - template. */ - gcc_assert (template0 >= 0); - ia64_add_bundle_selector_before (template0, insn); - b = PREV_INSN (insn); - insn = b; - /* See comment above in analogous place for emitting nops - after the insn. */ - template0 = template1; - template1 = -1; - } - /* Emit nops after the current insn. */ - for (i = 0; i < curr_state->before_nops_num; i++) - { - nop = gen_nop (); - ia64_emit_insn_before (nop, insn); - nop = PREV_INSN (insn); - insn = nop; - pos--; - gcc_assert (pos >= 0); - if (pos % 3 == 0) - { - /* See comment above in analogous place for emitting nops - after the insn. */ - gcc_assert (template0 >= 0); - ia64_add_bundle_selector_before (template0, insn); - b = PREV_INSN (insn); - insn = b; - template0 = template1; - template1 = -1; - } - } - } - if (ia64_tune == PROCESSOR_ITANIUM) - /* Insert additional cycles for MM-insns (MMMUL and MMSHF). - Itanium1 has a strange design, if the distance between an insn - and dependent MM-insn is less 4 then we have a 6 additional - cycles stall. So we make the distance equal to 4 cycles if it - is less. */ - for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); - insn != NULL_RTX; - insn = next_insn) - { - gcc_assert (INSN_P (insn) - && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE - && GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER); - next_insn = get_next_important_insn (NEXT_INSN (insn), tail); - if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)]) - /* We found a MM-insn which needs additional cycles. */ - { - rtx last; - int i, j, n; - int pred_stop_p; - - /* Now we are searching for a template of the bundle in - which the MM-insn is placed and the position of the - insn in the bundle (0, 1, 2). Also we are searching - for that there is a stop before the insn. */ - last = prev_active_insn (insn); - pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier; - if (pred_stop_p) - last = prev_active_insn (last); - n = 0; - for (;; last = prev_active_insn (last)) - if (recog_memoized (last) == CODE_FOR_bundle_selector) - { - template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0); - if (template0 == 9) - /* The insn is in MLX bundle. Change the template - onto MFI because we will add nops before the - insn. It simplifies subsequent code a lot. */ - PATTERN (last) - = gen_bundle_selector (const2_rtx); /* -> MFI */ - break; - } - else if (recog_memoized (last) != CODE_FOR_insn_group_barrier - && (ia64_safe_itanium_class (last) - != ITANIUM_CLASS_IGNORE)) - n++; - /* Some check of correctness: the stop is not at the - bundle start, there are no more 3 insns in the bundle, - and the MM-insn is not at the start of bundle with - template MLX. */ - gcc_assert ((!pred_stop_p || n) - && n <= 2 - && (template0 != 9 || !n)); - /* Put nops after the insn in the bundle. */ - for (j = 3 - n; j > 0; j --) - ia64_emit_insn_before (gen_nop (), insn); - /* It takes into account that we will add more N nops - before the insn lately -- please see code below. */ - add_cycles [INSN_UID (insn)]--; - if (!pred_stop_p || add_cycles [INSN_UID (insn)]) - ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), - insn); - if (pred_stop_p) - add_cycles [INSN_UID (insn)]--; - for (i = add_cycles [INSN_UID (insn)]; i > 0; i--) - { - /* Insert "MII;" template. */ - ia64_emit_insn_before (gen_bundle_selector (const0_rtx), - insn); - ia64_emit_insn_before (gen_nop (), insn); - ia64_emit_insn_before (gen_nop (), insn); - if (i > 1) - { - /* To decrease code size, we use "MI;I;" - template. */ - ia64_emit_insn_before - (gen_insn_group_barrier (GEN_INT (3)), insn); - i--; - } - ia64_emit_insn_before (gen_nop (), insn); - ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), - insn); - } - /* Put the MM-insn in the same slot of a bundle with the - same template as the original one. */ - ia64_add_bundle_selector_before (template0, insn); - /* To put the insn in the same slot, add necessary number - of nops. */ - for (j = n; j > 0; j --) - ia64_emit_insn_before (gen_nop (), insn); - /* Put the stop if the original bundle had it. */ - if (pred_stop_p) - ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), - insn); - } - } - free (index_to_bundle_states); - finish_bundle_state_table (); - bundling_p = 0; - dfa_clean_insn_cache (); -} - -/* The following function is called at the end of scheduling BB or - EBB. After reload, it inserts stop bits and does insn bundling. */ - -static void -ia64_sched_finish (FILE *dump, int sched_verbose) -{ - if (sched_verbose) - fprintf (dump, "// Finishing schedule.\n"); - if (!reload_completed) - return; - if (reload_completed) - { - final_emit_insn_group_barriers (dump); - bundling (dump, sched_verbose, current_sched_info->prev_head, - current_sched_info->next_tail); - if (sched_verbose && dump) - fprintf (dump, "// finishing %d-%d\n", - INSN_UID (NEXT_INSN (current_sched_info->prev_head)), - INSN_UID (PREV_INSN (current_sched_info->next_tail))); - - return; - } -} - -/* The following function inserts stop bits in scheduled BB or EBB. */ - -static void -final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) -{ - rtx insn; - int need_barrier_p = 0; - rtx prev_insn = NULL_RTX; - - init_insn_group_barriers (); - - for (insn = NEXT_INSN (current_sched_info->prev_head); - insn != current_sched_info->next_tail; - insn = NEXT_INSN (insn)) - { - if (GET_CODE (insn) == BARRIER) - { - rtx last = prev_active_insn (insn); - - if (! last) - continue; - if (GET_CODE (last) == JUMP_INSN - && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) - last = prev_active_insn (last); - if (recog_memoized (last) != CODE_FOR_insn_group_barrier) - emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); - - init_insn_group_barriers (); - need_barrier_p = 0; - prev_insn = NULL_RTX; - } - else if (INSN_P (insn)) - { - if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) - { - init_insn_group_barriers (); - need_barrier_p = 0; - prev_insn = NULL_RTX; - } - else if (need_barrier_p || group_barrier_needed (insn)) - { - if (TARGET_EARLY_STOP_BITS) - { - rtx last; - - for (last = insn; - last != current_sched_info->prev_head; - last = PREV_INSN (last)) - if (INSN_P (last) && GET_MODE (last) == TImode - && stops_p [INSN_UID (last)]) - break; - if (last == current_sched_info->prev_head) - last = insn; - last = prev_active_insn (last); - if (last - && recog_memoized (last) != CODE_FOR_insn_group_barrier) - emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), - last); - init_insn_group_barriers (); - for (last = NEXT_INSN (last); - last != insn; - last = NEXT_INSN (last)) - if (INSN_P (last)) - group_barrier_needed (last); - } - else - { - emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), - insn); - init_insn_group_barriers (); - } - group_barrier_needed (insn); - prev_insn = NULL_RTX; - } - else if (recog_memoized (insn) >= 0) - prev_insn = insn; - need_barrier_p = (GET_CODE (insn) == CALL_INSN - || GET_CODE (PATTERN (insn)) == ASM_INPUT - || asm_noperands (PATTERN (insn)) >= 0); - } - } -} - - - -/* If the following function returns TRUE, we will use the DFA - insn scheduler. */ - -static int -ia64_first_cycle_multipass_dfa_lookahead (void) -{ - return (reload_completed ? 6 : 4); -} - -/* The following function initiates variable `dfa_pre_cycle_insn'. */ - -static void -ia64_init_dfa_pre_cycle_insn (void) -{ - if (temp_dfa_state == NULL) - { - dfa_state_size = state_size (); - temp_dfa_state = xmalloc (dfa_state_size); - prev_cycle_state = xmalloc (dfa_state_size); - } - dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ()); - PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX; - recog_memoized (dfa_pre_cycle_insn); - dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3))); - PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX; - recog_memoized (dfa_stop_insn); -} - -/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN - used by the DFA insn scheduler. */ - -static rtx -ia64_dfa_pre_cycle_insn (void) -{ - return dfa_pre_cycle_insn; -} - -/* The following function returns TRUE if PRODUCER (of type ilog or - ld) produces address for CONSUMER (of type st or stf). */ - -int -ia64_st_address_bypass_p (rtx producer, rtx consumer) -{ - rtx dest, reg, mem; - - gcc_assert (producer && consumer); - dest = ia64_single_set (producer); - gcc_assert (dest); - reg = SET_DEST (dest); - gcc_assert (reg); - if (GET_CODE (reg) == SUBREG) - reg = SUBREG_REG (reg); - gcc_assert (GET_CODE (reg) == REG); - - dest = ia64_single_set (consumer); - gcc_assert (dest); - mem = SET_DEST (dest); - gcc_assert (mem && GET_CODE (mem) == MEM); - return reg_mentioned_p (reg, mem); -} - -/* The following function returns TRUE if PRODUCER (of type ilog or - ld) produces address for CONSUMER (of type ld or fld). */ - -int -ia64_ld_address_bypass_p (rtx producer, rtx consumer) -{ - rtx dest, src, reg, mem; - - gcc_assert (producer && consumer); - dest = ia64_single_set (producer); - gcc_assert (dest); - reg = SET_DEST (dest); - gcc_assert (reg); - if (GET_CODE (reg) == SUBREG) - reg = SUBREG_REG (reg); - gcc_assert (GET_CODE (reg) == REG); - - src = ia64_single_set (consumer); - gcc_assert (src); - mem = SET_SRC (src); - gcc_assert (mem); - - if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0) - mem = XVECEXP (mem, 0, 0); - else if (GET_CODE (mem) == IF_THEN_ELSE) - /* ??? Is this bypass necessary for ld.c? */ - { - gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR); - mem = XEXP (mem, 1); - } - - while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND) - mem = XEXP (mem, 0); - - if (GET_CODE (mem) == UNSPEC) - { - int c = XINT (mem, 1); - - gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA); - mem = XVECEXP (mem, 0, 0); - } - - /* Note that LO_SUM is used for GOT loads. */ - gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM); - - return reg_mentioned_p (reg, mem); -} - -/* The following function returns TRUE if INSN produces address for a - load/store insn. We will place such insns into M slot because it - decreases its latency time. */ - -int -ia64_produce_address_p (rtx insn) -{ - return insn->call; -} - - -/* Emit pseudo-ops for the assembler to describe predicate relations. - At present this assumes that we only consider predicate pairs to - be mutex, and that the assembler can deduce proper values from - straight-line code. */ - -static void -emit_predicate_relation_info (void) -{ - basic_block bb; - - FOR_EACH_BB_REVERSE (bb) - { - int r; - rtx head = BB_HEAD (bb); - - /* We only need such notes at code labels. */ - if (GET_CODE (head) != CODE_LABEL) - continue; - if (GET_CODE (NEXT_INSN (head)) == NOTE - && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK) - head = NEXT_INSN (head); - - /* Skip p0, which may be thought to be live due to (reg:DI p0) - grabbing the entire block of predicate registers. */ - for (r = PR_REG (2); r < PR_REG (64); r += 2) - if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r)) - { - rtx p = gen_rtx_REG (BImode, r); - rtx n = emit_insn_after (gen_pred_rel_mutex (p), head); - if (head == BB_END (bb)) - BB_END (bb) = n; - head = n; - } - } - - /* Look for conditional calls that do not return, and protect predicate - relations around them. Otherwise the assembler will assume the call - returns, and complain about uses of call-clobbered predicates after - the call. */ - FOR_EACH_BB_REVERSE (bb) - { - rtx insn = BB_HEAD (bb); - - while (1) - { - if (GET_CODE (insn) == CALL_INSN - && GET_CODE (PATTERN (insn)) == COND_EXEC - && find_reg_note (insn, REG_NORETURN, NULL_RTX)) - { - rtx b = emit_insn_before (gen_safe_across_calls_all (), insn); - rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn); - if (BB_HEAD (bb) == insn) - BB_HEAD (bb) = b; - if (BB_END (bb) == insn) - BB_END (bb) = a; - } - - if (insn == BB_END (bb)) - break; - insn = NEXT_INSN (insn); - } - } -} - -/* Perform machine dependent operations on the rtl chain INSNS. */ - -static void -ia64_reorg (void) -{ - /* We are freeing block_for_insn in the toplev to keep compatibility - with old MDEP_REORGS that are not CFG based. Recompute it now. */ - compute_bb_for_insn (); - - /* If optimizing, we'll have split before scheduling. */ - if (optimize == 0) - split_all_insns (0); - - /* ??? update_life_info_in_dirty_blocks fails to terminate during - non-optimizing bootstrap. */ - update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES); - - if (optimize && ia64_flag_schedule_insns2) - { - timevar_push (TV_SCHED2); - ia64_final_schedule = 1; - - initiate_bundle_states (); - ia64_nop = make_insn_raw (gen_nop ()); - PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX; - recog_memoized (ia64_nop); - clocks_length = get_max_uid () + 1; - stops_p = xcalloc (1, clocks_length); - if (ia64_tune == PROCESSOR_ITANIUM) - { - clocks = xcalloc (clocks_length, sizeof (int)); - add_cycles = xcalloc (clocks_length, sizeof (int)); - } - if (ia64_tune == PROCESSOR_ITANIUM2) - { - pos_1 = get_cpu_unit_code ("2_1"); - pos_2 = get_cpu_unit_code ("2_2"); - pos_3 = get_cpu_unit_code ("2_3"); - pos_4 = get_cpu_unit_code ("2_4"); - pos_5 = get_cpu_unit_code ("2_5"); - pos_6 = get_cpu_unit_code ("2_6"); - _0mii_ = get_cpu_unit_code ("2b_0mii."); - _0mmi_ = get_cpu_unit_code ("2b_0mmi."); - _0mfi_ = get_cpu_unit_code ("2b_0mfi."); - _0mmf_ = get_cpu_unit_code ("2b_0mmf."); - _0bbb_ = get_cpu_unit_code ("2b_0bbb."); - _0mbb_ = get_cpu_unit_code ("2b_0mbb."); - _0mib_ = get_cpu_unit_code ("2b_0mib."); - _0mmb_ = get_cpu_unit_code ("2b_0mmb."); - _0mfb_ = get_cpu_unit_code ("2b_0mfb."); - _0mlx_ = get_cpu_unit_code ("2b_0mlx."); - _1mii_ = get_cpu_unit_code ("2b_1mii."); - _1mmi_ = get_cpu_unit_code ("2b_1mmi."); - _1mfi_ = get_cpu_unit_code ("2b_1mfi."); - _1mmf_ = get_cpu_unit_code ("2b_1mmf."); - _1bbb_ = get_cpu_unit_code ("2b_1bbb."); - _1mbb_ = get_cpu_unit_code ("2b_1mbb."); - _1mib_ = get_cpu_unit_code ("2b_1mib."); - _1mmb_ = get_cpu_unit_code ("2b_1mmb."); - _1mfb_ = get_cpu_unit_code ("2b_1mfb."); - _1mlx_ = get_cpu_unit_code ("2b_1mlx."); - } - else - { - pos_1 = get_cpu_unit_code ("1_1"); - pos_2 = get_cpu_unit_code ("1_2"); - pos_3 = get_cpu_unit_code ("1_3"); - pos_4 = get_cpu_unit_code ("1_4"); - pos_5 = get_cpu_unit_code ("1_5"); - pos_6 = get_cpu_unit_code ("1_6"); - _0mii_ = get_cpu_unit_code ("1b_0mii."); - _0mmi_ = get_cpu_unit_code ("1b_0mmi."); - _0mfi_ = get_cpu_unit_code ("1b_0mfi."); - _0mmf_ = get_cpu_unit_code ("1b_0mmf."); - _0bbb_ = get_cpu_unit_code ("1b_0bbb."); - _0mbb_ = get_cpu_unit_code ("1b_0mbb."); - _0mib_ = get_cpu_unit_code ("1b_0mib."); - _0mmb_ = get_cpu_unit_code ("1b_0mmb."); - _0mfb_ = get_cpu_unit_code ("1b_0mfb."); - _0mlx_ = get_cpu_unit_code ("1b_0mlx."); - _1mii_ = get_cpu_unit_code ("1b_1mii."); - _1mmi_ = get_cpu_unit_code ("1b_1mmi."); - _1mfi_ = get_cpu_unit_code ("1b_1mfi."); - _1mmf_ = get_cpu_unit_code ("1b_1mmf."); - _1bbb_ = get_cpu_unit_code ("1b_1bbb."); - _1mbb_ = get_cpu_unit_code ("1b_1mbb."); - _1mib_ = get_cpu_unit_code ("1b_1mib."); - _1mmb_ = get_cpu_unit_code ("1b_1mmb."); - _1mfb_ = get_cpu_unit_code ("1b_1mfb."); - _1mlx_ = get_cpu_unit_code ("1b_1mlx."); - } - schedule_ebbs (); - finish_bundle_states (); - if (ia64_tune == PROCESSOR_ITANIUM) - { - free (add_cycles); - free (clocks); - } - free (stops_p); - stops_p = NULL; - emit_insn_group_barriers (dump_file); - - ia64_final_schedule = 0; - timevar_pop (TV_SCHED2); - } - else - emit_all_insn_group_barriers (dump_file); - - /* A call must not be the last instruction in a function, so that the - return address is still within the function, so that unwinding works - properly. Note that IA-64 differs from dwarf2 on this point. */ - if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) - { - rtx insn; - int saw_stop = 0; - - insn = get_last_insn (); - if (! INSN_P (insn)) - insn = prev_active_insn (insn); - /* Skip over insns that expand to nothing. */ - while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES) - { - if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE - && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) - saw_stop = 1; - insn = prev_active_insn (insn); - } - if (GET_CODE (insn) == CALL_INSN) - { - if (! saw_stop) - emit_insn (gen_insn_group_barrier (GEN_INT (3))); - emit_insn (gen_break_f ()); - emit_insn (gen_insn_group_barrier (GEN_INT (3))); - } - } - - emit_predicate_relation_info (); - - if (ia64_flag_var_tracking) - { - timevar_push (TV_VAR_TRACKING); - variable_tracking_main (); - timevar_pop (TV_VAR_TRACKING); - } -} - -/* Return true if REGNO is used by the epilogue. */ - -int -ia64_epilogue_uses (int regno) -{ - switch (regno) - { - case R_GR (1): - /* With a call to a function in another module, we will write a new - value to "gp". After returning from such a call, we need to make - sure the function restores the original gp-value, even if the - function itself does not use the gp anymore. */ - return !(TARGET_AUTO_PIC || TARGET_NO_PIC); - - case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): - case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): - /* For functions defined with the syscall_linkage attribute, all - input registers are marked as live at all function exits. This - prevents the register allocator from using the input registers, - which in turn makes it possible to restart a system call after - an interrupt without having to save/restore the input registers. - This also prevents kernel data from leaking to application code. */ - return lookup_attribute ("syscall_linkage", - TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; - - case R_BR (0): - /* Conditional return patterns can't represent the use of `b0' as - the return address, so we force the value live this way. */ - return 1; - - case AR_PFS_REGNUM: - /* Likewise for ar.pfs, which is used by br.ret. */ - return 1; - - default: - return 0; - } -} - -/* Return true if REGNO is used by the frame unwinder. */ - -int -ia64_eh_uses (int regno) -{ - if (! reload_completed) - return 0; - - if (current_frame_info.reg_save_b0 - && regno == current_frame_info.reg_save_b0) - return 1; - if (current_frame_info.reg_save_pr - && regno == current_frame_info.reg_save_pr) - return 1; - if (current_frame_info.reg_save_ar_pfs - && regno == current_frame_info.reg_save_ar_pfs) - return 1; - if (current_frame_info.reg_save_ar_unat - && regno == current_frame_info.reg_save_ar_unat) - return 1; - if (current_frame_info.reg_save_ar_lc - && regno == current_frame_info.reg_save_ar_lc) - return 1; - - return 0; -} - -/* Return true if this goes in small data/bss. */ - -/* ??? We could also support own long data here. Generating movl/add/ld8 - instead of addl,ld8/ld8. This makes the code bigger, but should make the - code faster because there is one less load. This also includes incomplete - types which can't go in sdata/sbss. */ - -static bool -ia64_in_small_data_p (tree exp) -{ - if (TARGET_NO_SDATA) - return false; - - /* We want to merge strings, so we never consider them small data. */ - if (TREE_CODE (exp) == STRING_CST) - return false; - - /* Functions are never small data. */ - if (TREE_CODE (exp) == FUNCTION_DECL) - return false; - - if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) - { - const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); - - if (strcmp (section, ".sdata") == 0 - || strncmp (section, ".sdata.", 7) == 0 - || strncmp (section, ".gnu.linkonce.s.", 16) == 0 - || strcmp (section, ".sbss") == 0 - || strncmp (section, ".sbss.", 6) == 0 - || strncmp (section, ".gnu.linkonce.sb.", 17) == 0) - return true; - } - else - { - HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); - - /* If this is an incomplete type with size 0, then we can't put it - in sdata because it might be too big when completed. */ - if (size > 0 && size <= ia64_section_threshold) - return true; - } - - return false; -} - -/* Output assembly directives for prologue regions. */ - -/* The current basic block number. */ - -static bool last_block; - -/* True if we need a copy_state command at the start of the next block. */ - -static bool need_copy_state; - -#ifndef MAX_ARTIFICIAL_LABEL_BYTES -# define MAX_ARTIFICIAL_LABEL_BYTES 30 -#endif - -/* Emit a debugging label after a call-frame-related insn. We'd - rather output the label right away, but we'd have to output it - after, not before, the instruction, and the instruction has not - been output yet. So we emit the label after the insn, delete it to - avoid introducing basic blocks, and mark it as preserved, such that - it is still output, given that it is referenced in debug info. */ - -static const char * -ia64_emit_deleted_label_after_insn (rtx insn) -{ - char label[MAX_ARTIFICIAL_LABEL_BYTES]; - rtx lb = gen_label_rtx (); - rtx label_insn = emit_label_after (lb, insn); - - LABEL_PRESERVE_P (lb) = 1; - - delete_insn (label_insn); - - ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn)); - - return xstrdup (label); -} - -/* Define the CFA after INSN with the steady-state definition. */ - -static void -ia64_dwarf2out_def_steady_cfa (rtx insn) -{ - rtx fp = frame_pointer_needed - ? hard_frame_pointer_rtx - : stack_pointer_rtx; - - dwarf2out_def_cfa - (ia64_emit_deleted_label_after_insn (insn), - REGNO (fp), - ia64_initial_elimination_offset - (REGNO (arg_pointer_rtx), REGNO (fp)) - + ARG_POINTER_CFA_OFFSET (current_function_decl)); -} - -/* The generic dwarf2 frame debug info generator does not define a - separate region for the very end of the epilogue, so refrain from - doing so in the IA64-specific code as well. */ - -#define IA64_CHANGE_CFA_IN_EPILOGUE 0 - -/* The function emits unwind directives for the start of an epilogue. */ - -static void -process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame) -{ - /* If this isn't the last block of the function, then we need to label the - current state, and copy it back in at the start of the next block. */ - - if (!last_block) - { - if (unwind) - fprintf (asm_out_file, "\t.label_state %d\n", - ++cfun->machine->state_num); - need_copy_state = true; - } - - if (unwind) - fprintf (asm_out_file, "\t.restore sp\n"); - if (IA64_CHANGE_CFA_IN_EPILOGUE && frame) - dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn), - STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET); -} - -/* This function processes a SET pattern looking for specific patterns - which result in emitting an assembly directive required for unwinding. */ - -static int -process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame) -{ - rtx src = SET_SRC (pat); - rtx dest = SET_DEST (pat); - int src_regno, dest_regno; - - /* Look for the ALLOC insn. */ - if (GET_CODE (src) == UNSPEC_VOLATILE - && XINT (src, 1) == UNSPECV_ALLOC - && GET_CODE (dest) == REG) - { - dest_regno = REGNO (dest); - - /* If this is the final destination for ar.pfs, then this must - be the alloc in the prologue. */ - if (dest_regno == current_frame_info.reg_save_ar_pfs) - { - if (unwind) - fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", - ia64_dbx_register_number (dest_regno)); - } - else - { - /* This must be an alloc before a sibcall. We must drop the - old frame info. The easiest way to drop the old frame - info is to ensure we had a ".restore sp" directive - followed by a new prologue. If the procedure doesn't - have a memory-stack frame, we'll issue a dummy ".restore - sp" now. */ - if (current_frame_info.total_size == 0 && !frame_pointer_needed) - /* if haven't done process_epilogue() yet, do it now */ - process_epilogue (asm_out_file, insn, unwind, frame); - if (unwind) - fprintf (asm_out_file, "\t.prologue\n"); - } - return 1; - } - - /* Look for SP = .... */ - if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM) - { - if (GET_CODE (src) == PLUS) - { - rtx op0 = XEXP (src, 0); - rtx op1 = XEXP (src, 1); - - gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT); - - if (INTVAL (op1) < 0) - { - gcc_assert (!frame_pointer_needed); - if (unwind) - fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n", - -INTVAL (op1)); - if (frame) - ia64_dwarf2out_def_steady_cfa (insn); - } - else - process_epilogue (asm_out_file, insn, unwind, frame); - } - else - { - gcc_assert (GET_CODE (src) == REG - && REGNO (src) == HARD_FRAME_POINTER_REGNUM); - process_epilogue (asm_out_file, insn, unwind, frame); - } - - return 1; - } - - /* Register move we need to look at. */ - if (GET_CODE (dest) == REG && GET_CODE (src) == REG) - { - src_regno = REGNO (src); - dest_regno = REGNO (dest); - - switch (src_regno) - { - case BR_REG (0): - /* Saving return address pointer. */ - gcc_assert (dest_regno == current_frame_info.reg_save_b0); - if (unwind) - fprintf (asm_out_file, "\t.save rp, r%d\n", - ia64_dbx_register_number (dest_regno)); - return 1; - - case PR_REG (0): - gcc_assert (dest_regno == current_frame_info.reg_save_pr); - if (unwind) - fprintf (asm_out_file, "\t.save pr, r%d\n", - ia64_dbx_register_number (dest_regno)); - return 1; - - case AR_UNAT_REGNUM: - gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat); - if (unwind) - fprintf (asm_out_file, "\t.save ar.unat, r%d\n", - ia64_dbx_register_number (dest_regno)); - return 1; - - case AR_LC_REGNUM: - gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc); - if (unwind) - fprintf (asm_out_file, "\t.save ar.lc, r%d\n", - ia64_dbx_register_number (dest_regno)); - return 1; - - case STACK_POINTER_REGNUM: - gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM - && frame_pointer_needed); - if (unwind) - fprintf (asm_out_file, "\t.vframe r%d\n", - ia64_dbx_register_number (dest_regno)); - if (frame) - ia64_dwarf2out_def_steady_cfa (insn); - return 1; - - default: - /* Everything else should indicate being stored to memory. */ - gcc_unreachable (); - } - } - - /* Memory store we need to look at. */ - if (GET_CODE (dest) == MEM && GET_CODE (src) == REG) - { - long off; - rtx base; - const char *saveop; - - if (GET_CODE (XEXP (dest, 0)) == REG) - { - base = XEXP (dest, 0); - off = 0; - } - else - { - gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS - && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT); - base = XEXP (XEXP (dest, 0), 0); - off = INTVAL (XEXP (XEXP (dest, 0), 1)); - } - - if (base == hard_frame_pointer_rtx) - { - saveop = ".savepsp"; - off = - off; - } - else - { - gcc_assert (base == stack_pointer_rtx); - saveop = ".savesp"; - } - - src_regno = REGNO (src); - switch (src_regno) - { - case BR_REG (0): - gcc_assert (!current_frame_info.reg_save_b0); - if (unwind) - fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off); - return 1; - - case PR_REG (0): - gcc_assert (!current_frame_info.reg_save_pr); - if (unwind) - fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off); - return 1; - - case AR_LC_REGNUM: - gcc_assert (!current_frame_info.reg_save_ar_lc); - if (unwind) - fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off); - return 1; - - case AR_PFS_REGNUM: - gcc_assert (!current_frame_info.reg_save_ar_pfs); - if (unwind) - fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off); - return 1; - - case AR_UNAT_REGNUM: - gcc_assert (!current_frame_info.reg_save_ar_unat); - if (unwind) - fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off); - return 1; - - case GR_REG (4): - case GR_REG (5): - case GR_REG (6): - case GR_REG (7): - if (unwind) - fprintf (asm_out_file, "\t.save.g 0x%x\n", - 1 << (src_regno - GR_REG (4))); - return 1; - - case BR_REG (1): - case BR_REG (2): - case BR_REG (3): - case BR_REG (4): - case BR_REG (5): - if (unwind) - fprintf (asm_out_file, "\t.save.b 0x%x\n", - 1 << (src_regno - BR_REG (1))); - return 1; - - case FR_REG (2): - case FR_REG (3): - case FR_REG (4): - case FR_REG (5): - if (unwind) - fprintf (asm_out_file, "\t.save.f 0x%x\n", - 1 << (src_regno - FR_REG (2))); - return 1; - - case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): - case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): - case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): - case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): - if (unwind) - fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", - 1 << (src_regno - FR_REG (12))); - return 1; - - default: - return 0; - } - } - - return 0; -} - - -/* This function looks at a single insn and emits any directives - required to unwind this insn. */ -void -process_for_unwind_directive (FILE *asm_out_file, rtx insn) -{ - bool unwind = (flag_unwind_tables - || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)); - bool frame = dwarf2out_do_frame (); - - if (unwind || frame) - { - rtx pat; - - if (GET_CODE (insn) == NOTE - && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) - { - last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR; - - /* Restore unwind state from immediately before the epilogue. */ - if (need_copy_state) - { - if (unwind) - { - fprintf (asm_out_file, "\t.body\n"); - fprintf (asm_out_file, "\t.copy_state %d\n", - cfun->machine->state_num); - } - if (IA64_CHANGE_CFA_IN_EPILOGUE && frame) - ia64_dwarf2out_def_steady_cfa (insn); - need_copy_state = false; - } - } - - if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn)) - return; - - pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); - if (pat) - pat = XEXP (pat, 0); - else - pat = PATTERN (insn); - - switch (GET_CODE (pat)) - { - case SET: - process_set (asm_out_file, pat, insn, unwind, frame); - break; - - case PARALLEL: - { - int par_index; - int limit = XVECLEN (pat, 0); - for (par_index = 0; par_index < limit; par_index++) - { - rtx x = XVECEXP (pat, 0, par_index); - if (GET_CODE (x) == SET) - process_set (asm_out_file, x, insn, unwind, frame); - } - break; - } - - default: - gcc_unreachable (); - } - } -} - - -enum ia64_builtins -{ - IA64_BUILTIN_BSP, - IA64_BUILTIN_FLUSHRS -}; - -void -ia64_init_builtins (void) -{ - tree fpreg_type; - tree float80_type; - - /* The __fpreg type. */ - fpreg_type = make_node (REAL_TYPE); - TYPE_PRECISION (fpreg_type) = 82; - layout_type (fpreg_type); - (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg"); - - /* The __float80 type. */ - float80_type = make_node (REAL_TYPE); - TYPE_PRECISION (float80_type) = 80; - layout_type (float80_type); - (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); - - /* The __float128 type. */ - if (!TARGET_HPUX) - { - tree float128_type = make_node (REAL_TYPE); - TYPE_PRECISION (float128_type) = 128; - layout_type (float128_type); - (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); - } - else - /* Under HPUX, this is a synonym for "long double". */ - (*lang_hooks.types.register_builtin_type) (long_double_type_node, - "__float128"); - -#define def_builtin(name, type, code) \ - lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD, \ - NULL, NULL_TREE) - - def_builtin ("__builtin_ia64_bsp", - build_function_type (ptr_type_node, void_list_node), - IA64_BUILTIN_BSP); - - def_builtin ("__builtin_ia64_flushrs", - build_function_type (void_type_node, void_list_node), - IA64_BUILTIN_FLUSHRS); - -#undef def_builtin -} - -rtx -ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, - int ignore ATTRIBUTE_UNUSED) -{ - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); - unsigned int fcode = DECL_FUNCTION_CODE (fndecl); - - switch (fcode) - { - case IA64_BUILTIN_BSP: - if (! target || ! register_operand (target, DImode)) - target = gen_reg_rtx (DImode); - emit_insn (gen_bsp_value (target)); -#ifdef POINTERS_EXTEND_UNSIGNED - target = convert_memory_address (ptr_mode, target); -#endif - return target; - - case IA64_BUILTIN_FLUSHRS: - emit_insn (gen_flushrs ()); - return const0_rtx; - - default: - break; - } - - return NULL_RTX; -} - -/* For the HP-UX IA64 aggregate parameters are passed stored in the - most significant bits of the stack slot. */ - -enum direction -ia64_hpux_function_arg_padding (enum machine_mode mode, tree type) -{ - /* Exception to normal case for structures/unions/etc. */ - - if (type && AGGREGATE_TYPE_P (type) - && int_size_in_bytes (type) < UNITS_PER_WORD) - return upward; - - /* Fall back to the default. */ - return DEFAULT_FUNCTION_ARG_PADDING (mode, type); -} - -/* Emit text to declare externally defined variables and functions, because - the Intel assembler does not support undefined externals. */ - -void -ia64_asm_output_external (FILE *file, tree decl, const char *name) -{ - /* We output the name if and only if TREE_SYMBOL_REFERENCED is - set in order to avoid putting out names that are never really - used. */ - if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) - { - /* maybe_assemble_visibility will return 1 if the assembler - visibility directive is outputed. */ - int need_visibility = ((*targetm.binds_local_p) (decl) - && maybe_assemble_visibility (decl)); - - /* GNU as does not need anything here, but the HP linker does - need something for external functions. */ - if ((TARGET_HPUX_LD || !TARGET_GNU_AS) - && TREE_CODE (decl) == FUNCTION_DECL) - { - ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); - (*targetm.asm_out.globalize_label) (file, name); - } - else if (need_visibility && !TARGET_GNU_AS) - (*targetm.asm_out.globalize_label) (file, name); - } -} - -/* Set SImode div/mod functions, init_integral_libfuncs only initializes - modes of word_mode and larger. Rename the TFmode libfuncs using the - HPUX conventions. __divtf3 is used for XFmode. We need to keep it for - backward compatibility. */ - -static void -ia64_init_libfuncs (void) -{ - set_optab_libfunc (sdiv_optab, SImode, "__divsi3"); - set_optab_libfunc (udiv_optab, SImode, "__udivsi3"); - set_optab_libfunc (smod_optab, SImode, "__modsi3"); - set_optab_libfunc (umod_optab, SImode, "__umodsi3"); - - set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); - set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); - set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); - set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); - set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); - - set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); - set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); - set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad"); - set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); - set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); - set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80"); - - set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl"); - set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); - set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad"); - set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl"); - set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl"); - - set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); - set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); - set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad"); - /* HP-UX 11.23 libc does not have a function for unsigned - SImode-to-TFmode conversion. */ - set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad"); -} - -/* Rename all the TFmode libfuncs using the HPUX conventions. */ - -static void -ia64_hpux_init_libfuncs (void) -{ - ia64_init_libfuncs (); - - /* The HP SI millicode division and mod functions expect DI arguments. - By turning them off completely we avoid using both libgcc and the - non-standard millicode routines and use the HP DI millicode routines - instead. */ - - set_optab_libfunc (sdiv_optab, SImode, 0); - set_optab_libfunc (udiv_optab, SImode, 0); - set_optab_libfunc (smod_optab, SImode, 0); - set_optab_libfunc (umod_optab, SImode, 0); - - set_optab_libfunc (sdiv_optab, DImode, "__milli_divI"); - set_optab_libfunc (udiv_optab, DImode, "__milli_divU"); - set_optab_libfunc (smod_optab, DImode, "__milli_remI"); - set_optab_libfunc (umod_optab, DImode, "__milli_remU"); - - /* HP-UX libc has TF min/max/abs routines in it. */ - set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin"); - set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); - set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); - - /* ia64_expand_compare uses this. */ - cmptf_libfunc = init_one_libfunc ("_U_Qfcmp"); - - /* These should never be used. */ - set_optab_libfunc (eq_optab, TFmode, 0); - set_optab_libfunc (ne_optab, TFmode, 0); - set_optab_libfunc (gt_optab, TFmode, 0); - set_optab_libfunc (ge_optab, TFmode, 0); - set_optab_libfunc (lt_optab, TFmode, 0); - set_optab_libfunc (le_optab, TFmode, 0); -} - -/* Rename the division and modulus functions in VMS. */ - -static void -ia64_vms_init_libfuncs (void) -{ - set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); - set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); - set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); - set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); - set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); - set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); - set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); - set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); -} - -/* Rename the TFmode libfuncs available from soft-fp in glibc using - the HPUX conventions. */ - -static void -ia64_sysv4_init_libfuncs (void) -{ - ia64_init_libfuncs (); - - /* These functions are not part of the HPUX TFmode interface. We - use them instead of _U_Qfcmp, which doesn't work the way we - expect. */ - set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); - set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); - set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); - set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); - set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); - set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); - - /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in - glibc doesn't have them. */ -} - -/* For HPUX, it is illegal to have relocations in shared segments. */ - -static int -ia64_hpux_reloc_rw_mask (void) -{ - return 3; -} - -/* For others, relax this so that relocations to local data goes in - read-only segments, but we still cannot allow global relocations - in read-only segments. */ - -static int -ia64_reloc_rw_mask (void) -{ - return flag_pic ? 3 : 2; -} - -/* Return the section to use for X. The only special thing we do here - is to honor small data. */ - -static section * -ia64_select_rtx_section (enum machine_mode mode, rtx x, - unsigned HOST_WIDE_INT align) -{ - if (GET_MODE_SIZE (mode) > 0 - && GET_MODE_SIZE (mode) <= ia64_section_threshold - && !TARGET_NO_SDATA) - return sdata_section; - else - return default_elf_select_rtx_section (mode, x, align); -} - -static unsigned int -ia64_section_type_flags (tree decl, const char *name, int reloc) -{ - unsigned int flags = 0; - - if (strcmp (name, ".sdata") == 0 - || strncmp (name, ".sdata.", 7) == 0 - || strncmp (name, ".gnu.linkonce.s.", 16) == 0 - || strncmp (name, ".sdata2.", 8) == 0 - || strncmp (name, ".gnu.linkonce.s2.", 17) == 0 - || strcmp (name, ".sbss") == 0 - || strncmp (name, ".sbss.", 6) == 0 - || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) - flags = SECTION_SMALL; - - flags |= default_section_type_flags (decl, name, reloc); - return flags; -} - -/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a - structure type and that the address of that type should be passed - in out0, rather than in r8. */ - -static bool -ia64_struct_retval_addr_is_first_parm_p (tree fntype) -{ - tree ret_type = TREE_TYPE (fntype); - - /* The Itanium C++ ABI requires that out0, rather than r8, be used - as the structure return address parameter, if the return value - type has a non-trivial copy constructor or destructor. It is not - clear if this same convention should be used for other - programming languages. Until G++ 3.4, we incorrectly used r8 for - these return values. */ - return (abi_version_at_least (2) - && ret_type - && TYPE_MODE (ret_type) == BLKmode - && TREE_ADDRESSABLE (ret_type) - && strcmp (lang_hooks.name, "GNU C++") == 0); -} - -/* Output the assembler code for a thunk function. THUNK_DECL is the - declaration for the thunk function itself, FUNCTION is the decl for - the target function. DELTA is an immediate constant offset to be - added to THIS. If VCALL_OFFSET is nonzero, the word at - *(*this + vcall_offset) should be added to THIS. */ - -static void -ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, - tree function) -{ - rtx this, insn, funexp; - unsigned int this_parmno; - unsigned int this_regno; - - reload_completed = 1; - epilogue_completed = 1; - no_new_pseudos = 1; - reset_block_changes (); - - /* Set things up as ia64_expand_prologue might. */ - last_scratch_gr_reg = 15; - - memset (¤t_frame_info, 0, sizeof (current_frame_info)); - current_frame_info.spill_cfa_off = -16; - current_frame_info.n_input_regs = 1; - current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); - - /* Mark the end of the (empty) prologue. */ - emit_note (NOTE_INSN_PROLOGUE_END); - - /* Figure out whether "this" will be the first parameter (the - typical case) or the second parameter (as happens when the - virtual function returns certain class objects). */ - this_parmno - = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk)) - ? 1 : 0); - this_regno = IN_REG (this_parmno); - if (!TARGET_REG_NAMES) - reg_names[this_regno] = ia64_reg_numbers[this_parmno]; - - this = gen_rtx_REG (Pmode, this_regno); - if (TARGET_ILP32) - { - rtx tmp = gen_rtx_REG (ptr_mode, this_regno); - REG_POINTER (tmp) = 1; - if (delta && CONST_OK_FOR_I (delta)) - { - emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta))); - delta = 0; - } - else - emit_insn (gen_ptr_extend (this, tmp)); - } - - /* Apply the constant offset, if required. */ - if (delta) - { - rtx delta_rtx = GEN_INT (delta); - - if (!CONST_OK_FOR_I (delta)) - { - rtx tmp = gen_rtx_REG (Pmode, 2); - emit_move_insn (tmp, delta_rtx); - delta_rtx = tmp; - } - emit_insn (gen_adddi3 (this, this, delta_rtx)); - } - - /* Apply the offset from the vtable, if required. */ - if (vcall_offset) - { - rtx vcall_offset_rtx = GEN_INT (vcall_offset); - rtx tmp = gen_rtx_REG (Pmode, 2); - - if (TARGET_ILP32) - { - rtx t = gen_rtx_REG (ptr_mode, 2); - REG_POINTER (t) = 1; - emit_move_insn (t, gen_rtx_MEM (ptr_mode, this)); - if (CONST_OK_FOR_I (vcall_offset)) - { - emit_insn (gen_ptr_extend_plus_imm (tmp, t, - vcall_offset_rtx)); - vcall_offset = 0; - } - else - emit_insn (gen_ptr_extend (tmp, t)); - } - else - emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); - - if (vcall_offset) - { - if (!CONST_OK_FOR_J (vcall_offset)) - { - rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); - emit_move_insn (tmp2, vcall_offset_rtx); - vcall_offset_rtx = tmp2; - } - emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); - } - - if (TARGET_ILP32) - emit_move_insn (gen_rtx_REG (ptr_mode, 2), - gen_rtx_MEM (ptr_mode, tmp)); - else - emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); - - emit_insn (gen_adddi3 (this, this, tmp)); - } - - /* Generate a tail call to the target function. */ - if (! TREE_USED (function)) - { - assemble_external (function); - TREE_USED (function) = 1; - } - funexp = XEXP (DECL_RTL (function), 0); - funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); - ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); - insn = get_last_insn (); - SIBLING_CALL_P (insn) = 1; - - /* Code generation for calls relies on splitting. */ - reload_completed = 1; - epilogue_completed = 1; - try_split (PATTERN (insn), insn, 0); - - emit_barrier (); - - /* Run just enough of rest_of_compilation to get the insns emitted. - There's not really enough bulk here to make other passes such as - instruction scheduling worth while. Note that use_thunk calls - assemble_start_function and assemble_end_function. */ - - insn_locators_initialize (); - emit_all_insn_group_barriers (NULL); - insn = get_insns (); - shorten_branches (insn); - final_start_function (insn, file, 1); - final (insn, file, 1); - final_end_function (); - - reload_completed = 0; - epilogue_completed = 0; - no_new_pseudos = 0; -} - -/* Worker function for TARGET_STRUCT_VALUE_RTX. */ - -static rtx -ia64_struct_value_rtx (tree fntype, - int incoming ATTRIBUTE_UNUSED) -{ - if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)) - return NULL_RTX; - return gen_rtx_REG (Pmode, GR_REG (8)); -} - -static bool -ia64_scalar_mode_supported_p (enum machine_mode mode) -{ - switch (mode) - { - case QImode: - case HImode: - case SImode: - case DImode: - case TImode: - return true; - - case SFmode: - case DFmode: - case XFmode: - case RFmode: - return true; - - case TFmode: - return TARGET_HPUX; - - default: - return false; - } -} - -static bool -ia64_vector_mode_supported_p (enum machine_mode mode) -{ - switch (mode) - { - case V8QImode: - case V4HImode: - case V2SImode: - return true; - - case V2SFmode: - return true; - - default: - return false; - } -} - -/* Implement the FUNCTION_PROFILER macro. */ - -void -ia64_output_function_profiler (FILE *file, int labelno) -{ - bool indirect_call; - - /* If the function needs a static chain and the static chain - register is r15, we use an indirect call so as to bypass - the PLT stub in case the executable is dynamically linked, - because the stub clobbers r15 as per 5.3.6 of the psABI. - We don't need to do that in non canonical PIC mode. */ - - if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC) - { - gcc_assert (STATIC_CHAIN_REGNUM == 15); - indirect_call = true; - } - else - indirect_call = false; - - if (TARGET_GNU_AS) - fputs ("\t.prologue 4, r40\n", file); - else - fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file); - fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file); - - if (NO_PROFILE_COUNTERS) - fputs ("\tmov out3 = r0\n", file); - else - { - char buf[20]; - ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); - - if (TARGET_AUTO_PIC) - fputs ("\tmovl out3 = @gprel(", file); - else - fputs ("\taddl out3 = @ltoff(", file); - assemble_name (file, buf); - if (TARGET_AUTO_PIC) - fputs (")\n", file); - else - fputs ("), r1\n", file); - } - - if (indirect_call) - fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file); - fputs ("\t;;\n", file); - - fputs ("\t.save rp, r42\n", file); - fputs ("\tmov out2 = b0\n", file); - if (indirect_call) - fputs ("\tld8 r14 = [r14]\n\t;;\n", file); - fputs ("\t.body\n", file); - fputs ("\tmov out1 = r1\n", file); - if (indirect_call) - { - fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file); - fputs ("\tmov b6 = r16\n", file); - fputs ("\tld8 r1 = [r14]\n", file); - fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file); - } - else - fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file); -} - -static GTY(()) rtx mcount_func_rtx; -static rtx -gen_mcount_func_rtx (void) -{ - if (!mcount_func_rtx) - mcount_func_rtx = init_one_libfunc ("_mcount"); - return mcount_func_rtx; -} - -void -ia64_profile_hook (int labelno) -{ - rtx label, ip; - - if (NO_PROFILE_COUNTERS) - label = const0_rtx; - else - { - char buf[30]; - const char *label_name; - ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); - label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf)); - label = gen_rtx_SYMBOL_REF (Pmode, label_name); - SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL; - } - ip = gen_reg_rtx (Pmode); - emit_insn (gen_ip_value (ip)); - emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL, - VOIDmode, 3, - gen_rtx_REG (Pmode, BR_REG (0)), Pmode, - ip, Pmode, - label, Pmode); -} - -/* Return the mangling of TYPE if it is an extended fundamental type. */ - -static const char * -ia64_mangle_fundamental_type (tree type) -{ - /* On HP-UX, "long double" is mangled as "e" so __float128 is - mangled as "e". */ - if (!TARGET_HPUX && TYPE_MODE (type) == TFmode) - return "g"; - /* On HP-UX, "e" is not available as a mangling of __float80 so use - an extended mangling. Elsewhere, "e" is available since long - double is 80 bits. */ - if (TYPE_MODE (type) == XFmode) - return TARGET_HPUX ? "u9__float80" : "e"; - if (TYPE_MODE (type) == RFmode) - return "u7__fpreg"; - return NULL; -} - -/* Return the diagnostic message string if conversion from FROMTYPE to - TOTYPE is not allowed, NULL otherwise. */ -static const char * -ia64_invalid_conversion (tree fromtype, tree totype) -{ - /* Reject nontrivial conversion to or from __fpreg. */ - if (TYPE_MODE (fromtype) == RFmode - && TYPE_MODE (totype) != RFmode - && TYPE_MODE (totype) != VOIDmode) - return N_("invalid conversion from %<__fpreg%>"); - if (TYPE_MODE (totype) == RFmode - && TYPE_MODE (fromtype) != RFmode) - return N_("invalid conversion to %<__fpreg%>"); - return NULL; -} - -/* Return the diagnostic message string if the unary operation OP is - not permitted on TYPE, NULL otherwise. */ -static const char * -ia64_invalid_unary_op (int op, tree type) -{ - /* Reject operations on __fpreg other than unary + or &. */ - if (TYPE_MODE (type) == RFmode - && op != CONVERT_EXPR - && op != ADDR_EXPR) - return N_("invalid operation on %<__fpreg%>"); - return NULL; -} - -/* Return the diagnostic message string if the binary operation OP is - not permitted on TYPE1 and TYPE2, NULL otherwise. */ -static const char * -ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2) -{ - /* Reject operations on __fpreg. */ - if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode) - return N_("invalid operation on %<__fpreg%>"); - return NULL; -} - -/* Implement overriding of the optimization options. */ -void -ia64_optimization_options (int level ATTRIBUTE_UNUSED, - int size ATTRIBUTE_UNUSED) -{ - /* Let the scheduler form additional regions. */ - set_param_value ("max-sched-extend-regions-iters", 2); -} - -#include "gt-ia64.h" |