diff --git a/gcc/common/config/nds32/nds32-common.c b/gcc/common/config/nds32/nds32-common.c
index dbcc390728e..48236605118 100644
--- a/gcc/common/config/nds32/nds32-common.c
+++ b/gcc/common/config/nds32/nds32-common.c
@@ -53,6 +53,16 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
 
       return true;
 
+    case OPT_misr_secure_:
+      /* Check the valid security level: 0 1 2 3.  */
+      if (value < 0 || value > 3)
+	{
+	  error_at (loc, "for the option -misr-secure=X, the valid X "
+			 "must be: 0, 1, 2, or 3");
+	  return false;
+	}
+      return true;
+
     case OPT_mcache_block_size_:
       /* Check valid value: 4 8 16 32 64 128 256 512.  */
       if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
@@ -74,12 +84,19 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options nds32_option_optimization_table[] =
 {
+#if TARGET_LINUX_ABI == 0
+  /* Disable -fdelete-null-pointer-checks by default in ELF toolchain.  */
+  { OPT_LEVELS_ALL,               OPT_fdelete_null_pointer_checks,
+							   NULL, 0 },
+#endif
   /* Enable -fsched-pressure by default at -O1 and above.  */
   { OPT_LEVELS_1_PLUS,            OPT_fsched_pressure,     NULL, 1 },
   /* Enable -fomit-frame-pointer by default at all optimization levels.  */
   { OPT_LEVELS_ALL,               OPT_fomit_frame_pointer, NULL, 1 },
   /* Enable -mrelax-hint by default at all optimization levels.  */
   { OPT_LEVELS_ALL,               OPT_mrelax_hint,         NULL, 1 },
+  /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og.  */
+  { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align,       NULL, 1 },
   /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
   { OPT_LEVELS_SIZE,              OPT_mv3push,             NULL, 1 },
 
@@ -87,6 +104,19 @@ static const struct default_options nds32_option_optimization_table[] =
 };
 
 /* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO.  */
+static enum unwind_info_type
+nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  if (TARGET_LINUX_ABI)
+    return UI_DWARF2;
+
+  return UI_SJLJ;
+}
+
+/* ------------------------------------------------------------------------ */
+
 
 /* Run-time Target Specification.  */
 
@@ -103,6 +133,7 @@ static const struct default_options nds32_option_optimization_table[] =
      TARGET_EXT_PERF   : Generate performance extention instrcution.
      TARGET_EXT_PERF2  : Generate performance extention version 2 instrcution.
      TARGET_EXT_STRING : Generate string extention instrcution.
+     TARGET_HW_ABS     : Generate hardware abs instruction.
      TARGET_CMOV       : Generate conditional move instruction.  */
 #undef TARGET_DEFAULT_TARGET_FLAGS
 #define TARGET_DEFAULT_TARGET_FLAGS		\
@@ -113,6 +144,7 @@ static const struct default_options nds32_option_optimization_table[] =
    | MASK_EXT_PERF				\
    | MASK_EXT_PERF2				\
    | MASK_EXT_STRING				\
+   | MASK_HW_ABS				\
    | MASK_CMOV)
 
 #undef TARGET_HANDLE_OPTION
@@ -125,7 +157,7 @@ static const struct default_options nds32_option_optimization_table[] =
 /* Defining the Output Assembler Language.  */
 
 #undef TARGET_EXCEPT_UNWIND_INFO
-#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info
 
 /* ------------------------------------------------------------------------ */
 
diff --git a/gcc/config.gcc b/gcc/config.gcc
index e58494c1c17..5ccf9e4ec72 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -444,7 +444,17 @@ mips*-*-*)
 	;;
 nds32*)
 	cpu_type=nds32
-	extra_headers="nds32_intrinsic.h"
+	extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc"
+	case ${target} in
+	  nds32*-*-linux*)
+	    extra_options="${extra_options} nds32/nds32-linux.opt"
+	    ;;
+	  nds32*-*-elf*)
+	    extra_options="${extra_options} nds32/nds32-elf.opt"
+	    ;;
+	  *)
+	    ;;
+	esac
 	extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o nds32-relax-opt.o nds32-utils.o"
 	;;
 nios2-*-*)
@@ -2332,17 +2342,36 @@ msp430*-*-*)
 	tmake_file="${tmake_file} msp430/t-msp430"
 	extra_gcc_objs="driver-msp430.o"
 	;;
-nds32le-*-*)
+nds32*-*-*)
 	target_cpu_default="0"
 	tm_defines="${tm_defines}"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h"
-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
-	;;
-nds32be-*-*)
-	target_cpu_default="0|MASK_BIG_ENDIAN"
-	tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
-	tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h"
-	tmake_file="nds32/t-nds32 nds32/t-mlibs"
+	case ${target} in
+	  nds32le*-*-*)
+	    ;;
+	  nds32be-*-*)
+	    target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN"
+	    tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
+	    ;;
+	esac
+	case ${target} in
+	  nds32*-*-elf*)
+	    tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h"
+	    tmake_file="nds32/t-nds32 nds32/t-elf"
+	    ;;
+	  nds32*-*-linux*)
+	    tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h"
+	    tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux"
+	    ;;
+	esac
+
+	# Handle --enable-default-relax setting.
+	if test x${enable_default_relax} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1"
+	fi
+	# Handle --with-ext-dsp
+	if test x${with_ext_dsp} = xyes; then
+		tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1"
+	fi
 	;;
 nios2-*-*)
 	tm_file="elfos.h ${tm_file}"
@@ -4315,11 +4344,11 @@ case "${target}" in
 		"")
 			with_cpu=n9
 			;;
-		n6 | n7 | n8 | e8 | s8 | n9)
+		n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10 | n12 | n13 | n15)
 			# OK
 			;;
 		*)
-			echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9" 1>&2
+			echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10 n12 n13 n15" 1>&2
 			exit 1
 			;;
 		esac
@@ -4329,15 +4358,30 @@ case "${target}" in
 		"")
 			# the default library is newlib
 			with_nds32_lib=newlib
+			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
 			;;
 		newlib)
 			# OK
+			tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1"
 			;;
 		mculib)
 			# OK
+			# for the arch=v3f or arch=v3s under mculib toolchain,
+			# we would like to set -fno-math-errno as default
+			case "${with_arch}" in
+			v3f | v3s)
+				tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1"
+				;;
+			esac
+			;;
+		glibc)
+			# OK
+			tm_defines="${tm_defines}"
+			;;
+		uclibc)
 			;;
 		*)
-			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2
+			echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib glibc uclibc" 1>&2
 			exit 1
 			;;
 		esac
diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md
index 37c27049ef0..6d42f50c882 100644
--- a/gcc/config/nds32/constants.md
+++ b/gcc/config/nds32/constants.md
@@ -23,6 +23,7 @@
 (define_constants
   [(R8_REGNUM  8)
    (TA_REGNUM 15)
+   (TP_REGNUM 25)
    (FP_REGNUM 28)
    (GP_REGNUM 29)
    (LP_REGNUM 30)
@@ -49,6 +50,16 @@
   UNSPEC_FFB
   UNSPEC_FFMISM
   UNSPEC_FLMISM
+  UNSPEC_KDMBB
+  UNSPEC_KDMBT
+  UNSPEC_KDMTB
+  UNSPEC_KDMTT
+  UNSPEC_KHMBB
+  UNSPEC_KHMBT
+  UNSPEC_KHMTB
+  UNSPEC_KHMTT
+  UNSPEC_KSLRAW
+  UNSPEC_KSLRAWU
   UNSPEC_SVA
   UNSPEC_SVS
   UNSPEC_WSBH
@@ -62,6 +73,29 @@
   UNSPEC_UASTORE_HW
   UNSPEC_UASTORE_W
   UNSPEC_UASTORE_DW
+  UNSPEC_GOTINIT
+  UNSPEC_GOT
+  UNSPEC_GOTOFF
+  UNSPEC_PLT
+  UNSPEC_TLSGD
+  UNSPEC_TLSLD
+  UNSPEC_TLSIE
+  UNSPEC_TLSLE
+  UNSPEC_ROUND
+  UNSPEC_VEC_COMPARE
+  UNSPEC_KHM
+  UNSPEC_KHMX
+  UNSPEC_CLIP_OV
+  UNSPEC_CLIPS_OV
+  UNSPEC_BITREV
+  UNSPEC_KABS
+  UNSPEC_LOOP_END
+  UNSPEC_TLS_DESC
+  UNSPEC_TLS_IE
+  UNSPEC_ADD32
+  UNSPEC_ICT
+  UNSPEC_KADDH
+  UNSPEC_KSUBH
 ])
 
 ;; The unspec_volatile operation index.
@@ -135,10 +169,14 @@
   UNSPEC_VOLATILE_SET_TRIG_EDGE
   UNSPEC_VOLATILE_GET_TRIG_TYPE
   UNSPEC_VOLATILE_RELAX_GROUP
+  UNSPEC_VOLATILE_OMIT_FP_BEGIN
+  UNSPEC_VOLATILE_OMIT_FP_END
   UNSPEC_VOLATILE_POP25_RETURN
   UNSPEC_VOLATILE_UNALIGNED_FEATURE
   UNSPEC_VOLATILE_ENABLE_UNALIGNED
   UNSPEC_VOLATILE_DISABLE_UNALIGNED
+  UNSPEC_VOLATILE_RDOV
+  UNSPEC_VOLATILE_CLROV
 ])
 
 ;; ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md
index 7af7769fcbf..315c60313e5 100644
--- a/gcc/config/nds32/constraints.md
+++ b/gcc/config/nds32/constraints.md
@@ -127,6 +127,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -31, 0)")))
 
+(define_constraint "Iu06"
+  "Unsigned immediate 6-bit value"
+  (and (match_code "const_int")
+       (match_test "ival < (1 << 6) && ival >= 0")))
+
 ;; Ip05 is special and dedicated for v3 movpi45 instruction.
 ;; movpi45 has imm5u field but the range is 16 ~ 47.
 (define_constraint "Ip05"
@@ -136,10 +141,10 @@
 		    && ival >= (0 + 16)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
 
-(define_constraint "Iu06"
+(define_constraint "IU06"
   "Unsigned immediate 6-bit value constraint for addri36.sp instruction"
   (and (match_code "const_int")
-       (match_test "ival < (1 << 6)
+       (match_test "ival < (1 << 8)
 		    && ival >= 0
 		    && (ival % 4 == 0)
 		    && (TARGET_ISA_V3 || TARGET_ISA_V3M)")))
@@ -302,6 +307,25 @@
        (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M)
 		    && (IN_RANGE (exact_log2 (ival + 1), 1, 8))")))
 
+(define_constraint "CVp5"
+  "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVp5_p (op)")))
+
+(define_constraint "CVs5"
+  "Signed immediate 5-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs5_p (op)")))
+
+(define_constraint "CVs2"
+  "Signed immediate 20-bit value"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVs2_p (op)")))
+
+(define_constraint "CVhi"
+  "The immediate value that can be simply set high 20-bit"
+  (and (match_code "const_vector")
+       (match_test "nds32_valid_CVhi_p (op)")))
 
 (define_memory_constraint "U33"
   "Memory constraint for 333 format"
@@ -349,4 +373,9 @@
        (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
 		     && nds32_float_mem_operand_p (op)")))
 
+(define_constraint "S"
+  "@internal
+   A constant call address."
+  (match_operand 0 "nds32_symbolic_operand"))
+
 ;; ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h
new file mode 100644
index 00000000000..66397ac2e30
--- /dev/null
+++ b/gcc/config/nds32/elf.h
@@ -0,0 +1,81 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 0
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+  " %{G*}" \
+  " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+  " %{shared:-shared}" \
+  NDS32_RELAX_SPEC
+
+#define LIB_SPEC \
+  " -lc -lgloss"
+
+#define LIBGCC_SPEC \
+  " -lgcc"
+
+/* The option -mno-ctor-dtor can disable constructor/destructor feature
+   by applying different crt stuff.  In the convention, crt0.o is the
+   startup file without constructor/destructor;
+   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
+   startup files with constructor/destructor.
+   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
+   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
+   currently provided by GCC for nds32 target.
+
+   For nds32 target so far:
+   If -mno-ctor-dtor, we are going to link
+   "crt0.o [user objects]".
+   If -mctor-dtor, we are going to link
+   "crt1.o crtbegin1.o [user objects] crtend1.o".
+
+   Note that the TARGET_DEFAULT_CTOR_DTOR would effect the
+   default behavior.  Check gcc/config.gcc for more information.  */
+#ifdef TARGET_DEFAULT_CTOR_DTOR
+  #define STARTFILE_SPEC \
+    " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+    " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{!mno-ctor-dtor:crtend1.o%s}"
+#else
+  #define STARTFILE_SPEC \
+    " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \
+    " %{mctor-dtor|coverage:crtbegin1.o%s}" \
+    " %{mcrt-arg:crtarg.o%s}"
+  #define ENDFILE_SPEC \
+    " %{mctor-dtor|coverage:crtend1.o%s}"
+#endif
+
+#define STARTFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
+  " %{!mno-ctor-dtor:crtbegin1.o%s}" \
+  " %{mcrt-arg:crtarg.o%s}"
+#define ENDFILE_CXX_SPEC \
+  " %{!mno-ctor-dtor:crtend1.o%s}"
diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md
index c2062de2e97..f4fb58181b1 100644
--- a/gcc/config/nds32/iterators.md
+++ b/gcc/config/nds32/iterators.md
@@ -68,6 +68,28 @@
 ;; shifts
 (define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert])
 
+(define_code_iterator shifts [ashift ashiftrt lshiftrt])
+
+(define_code_iterator shiftrt [ashiftrt lshiftrt])
+
+(define_code_iterator sat_plus [ss_plus us_plus])
+
+(define_code_iterator all_plus [plus ss_plus us_plus])
+
+(define_code_iterator sat_minus [ss_minus us_minus])
+
+(define_code_iterator all_minus [minus ss_minus us_minus])
+
+(define_code_iterator plus_minus [plus minus])
+
+(define_code_iterator extend [sign_extend zero_extend])
+
+(define_code_iterator sumax [smax umax])
+
+(define_code_iterator sumin [smin umin])
+
+(define_code_iterator sumin_max [smax umax smin umin])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes.
 ;;----------------------------------------------------------------------------
@@ -76,5 +98,23 @@
 (define_code_attr shift
   [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")])
 
+(define_code_attr su
+  [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")])
+
+(define_code_attr zs
+  [(sign_extend "s") (zero_extend "z")])
+
+(define_code_attr uk
+  [(plus "") (ss_plus "k") (us_plus "uk")
+   (minus "") (ss_minus "k") (us_minus "uk")])
+
+(define_code_attr opcode
+  [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")])
+
+(define_code_attr add_rsub
+  [(plus "a") (minus "rs")])
+
+(define_code_attr add_sub
+  [(plus "a") (minus "s")])
 
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/nds32/linux.h b/gcc/config/nds32/linux.h
new file mode 100644
index 00000000000..f66f9076baf
--- /dev/null
+++ b/gcc/config/nds32/linux.h
@@ -0,0 +1,86 @@
+/* Definitions of target machine of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2014 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* ------------------------------------------------------------------------ */
+
+#define TARGET_LINUX_ABI 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      GNU_USER_TARGET_OS_CPP_BUILTINS();           \
+    }                                           \
+  while (0)
+
+#ifdef TARGET_BIG_ENDIAN_DEFAULT
+#define LD_SO_ENDIAN_SPEC "%{mlittle-endian:le}%{!mlittle-endian:be}"
+#else
+#define LD_SO_ENDIAN_SPEC "%{mbig-endian:be}%{!mbig-endian:le}"
+#endif
+
+/* Record arch version in TARGET_ARCH_DEFAULT. 0 means soft ABI,
+   1 means  hard ABI and using full floating-point instruction,
+   2 means hard ABI and only using single-precision floating-point
+   instruction  */
+#if TARGET_ARCH_DEFAULT
+#define LD_SO_ABI_SPEC "%{!mabi=2:f}"
+#else
+#define LD_SO_ABI_SPEC "%{mabi=2fp+:f}"
+#endif
+
+#define GLIBC_DYNAMIC_LINKER \
+  "/lib/ld-linux-nds32" LD_SO_ENDIAN_SPEC LD_SO_ABI_SPEC ".so.1"
+
+/* In the configure stage we may use options --enable-default-relax,
+   --enable-Os-default-ifc and --enable-Os-default-ex9.  They effect
+   the default spec of passing --relax, --mifc, and --mex9 to linker.
+   We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC
+   so that we can customize them conveniently.  */
+#define LINK_SPEC \
+ " %{G*}" \
+ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+ " %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+    %{static:-static}}" \
+  NDS32_RELAX_SPEC
+
+#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} "
+
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+/* The SYNC operations are implemented as library functions, not
+   INSN patterns.  As a result, the HAVE defines for the patterns are
+   not defined.  We need to define them to generate the corresponding
+   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
+   defines.
+   Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html  */
+#define HAVE_sync_compare_and_swapqi 1
+#define HAVE_sync_compare_and_swaphi 1
+#define HAVE_sync_compare_and_swapsi 1
diff --git a/gcc/config/nds32/nds32-cost.c b/gcc/config/nds32/nds32-cost.c
index 8d01e8afee2..979000fcc45 100644
--- a/gcc/config/nds32/nds32-cost.c
+++ b/gcc/config/nds32/nds32-cost.c
@@ -34,66 +34,379 @@
 #include "optabs.h"		/* For GEN_FCN.  */
 #include "recog.h"
 #include "tm-constrs.h"
+#include "tree-pass.h"
 
 /* ------------------------------------------------------------------------ */
 
-bool
-nds32_rtx_costs_impl (rtx x,
-		      machine_mode mode ATTRIBUTE_UNUSED,
-		      int outer_code,
-		      int opno ATTRIBUTE_UNUSED,
-		      int *total,
-		      bool speed)
-{
-  int code = GET_CODE (x);
+typedef bool (*rtx_cost_func) (rtx, int, int, int, int*);
 
-  /* According to 'speed', goto suitable cost model section.  */
-  if (speed)
-    goto performance_cost;
-  else
-    goto size_cost;
+struct rtx_cost_model_t {
+  rtx_cost_func speed_prefer;
+  rtx_cost_func size_prefer;
+};
 
+static rtx_cost_model_t rtx_cost_model;
 
-performance_cost:
-  /* This is section for performance cost model.  */
+static int insn_size_16bit; /* Initial at nds32_init_rtx_costs.  */
+static const int insn_size_32bit = 4;
+
+static bool
+nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED,
+			      int code,
+			      int outer_code ATTRIBUTE_UNUSED,
+			      int opno ATTRIBUTE_UNUSED,
+			      int *total)
+{
+  rtx op0;
+  rtx op1;
+  machine_mode mode = GET_MODE (x);
+  /* Scale cost by mode size.  */
+  int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
 
-  /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
-     We treat it as 4-cycle cost for each instruction
-     under performance consideration.  */
   switch (code)
     {
-    case SET:
-      /* For 'SET' rtx, we need to return false
-         so that it can recursively calculate costs.  */
-      return false;
-
     case USE:
       /* Used in combine.c as a marker.  */
       *total = 0;
-      break;
+      return true;
+
+    case CONST_INT:
+      /* When not optimizing for size, we care more about the cost
+	 of hot code, and hot code is often in a loop.  If a constant
+	 operand needs to be forced into a register, we will often be
+	 able to hoist the constant load out of the loop, so the load
+	 should not contribute to the cost.  */
+      if (outer_code == SET || outer_code == PLUS)
+	*total = satisfies_constraint_Is20 (x) ? 0 : 4;
+      else if (outer_code == AND || outer_code == IOR || outer_code == XOR
+	       || outer_code == MINUS)
+	*total = satisfies_constraint_Iu15 (x) ? 0 : 4;
+      else if (outer_code == ASHIFT || outer_code == ASHIFTRT
+	       || outer_code == LSHIFTRT)
+	*total = satisfies_constraint_Iu05 (x) ? 0 : 4;
+      else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
+	       || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
+	*total = satisfies_constraint_Is16 (x) ? 0 : 4;
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST:
+    case LO_SUM:
+    case HIGH:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case MEM:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case SET:
+      op0 = SET_DEST (x);
+      op1 = SET_SRC (x);
+      mode = GET_MODE (op0);
+      /* Scale cost by mode size.  */
+      cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
+
+      switch (GET_CODE (op1))
+	{
+	case REG:
+	case SUBREG:
+	  /* Register move and Store instructions.  */
+	  if ((REG_P (op0) || MEM_P (op0))
+	      && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = cost;
+	  return true;
+
+	case MEM:
+	  /* Load instructions.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = cost;
+	  return true;
+
+	case CONST_INT:
+	  /* movi instruction.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
+	    {
+	      if (satisfies_constraint_Is20 (op1))
+		*total = COSTS_N_INSNS (1) - 1;
+	      else
+		*total = COSTS_N_INSNS (2);
+	    }
+	  else
+	    *total = cost;
+	  return true;
+
+	case CONST:
+	case SYMBOL_REF:
+	case LABEL_REF:
+	  /* la instruction.  */
+	  if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
+	    *total = COSTS_N_INSNS (1) - 1;
+	  else
+	    *total = cost;
+	  return true;
+	case VEC_SELECT:
+	  *total = cost;
+	  return true;
+
+	default:
+	  *total = cost;
+	  return true;
+	}
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
+	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
+	/* ALU_SHIFT */
+	*total = COSTS_N_INSNS (2);
+
+      else if ((GET_CODE (op1) == CONST_INT
+		&& satisfies_constraint_Is15 (op1))
+		|| REG_P (op1))
+	/* ADD instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* ADD instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case MINUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
+	       || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
+	/* ALU_SHIFT */
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (op0) == CONST_INT
+		&& satisfies_constraint_Is15 (op0))
+		|| REG_P (op0))
+	/* SUB instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SUB instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case TRUNCATE:
+      /* TRUNCATE and AND behavior is same. */
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (NDS32_EXT_DSP_P ())
+	{
+	  /* We prefer (and (ior) (ior)) than (ior (and) (and)) for
+	     synthetize pk** and insb instruction.  */
+	  if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR)
+	    return COSTS_N_INSNS (1);
+
+	  if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND)
+	    return COSTS_N_INSNS (10);
+	}
+
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT)
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (op1) == CONST_INT
+	       && satisfies_constraint_Iu15 (op1))
+	       || REG_P (op1))
+	/* AND, OR, XOR instructions */
+	*total = COSTS_N_INSNS (1);
+      else if (code == AND || GET_CODE (op0) == NOT)
+	/* BITC instruction */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* AND, OR, XOR instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
 
     case MULT:
+      if (GET_MODE (x) == DImode
+	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+	  || GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+	/* MUL instructions */
+	*total = COSTS_N_INSNS (1);
+      else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == PLUS || outer_code == MINUS)
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* MUL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* MUL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+
+      if (TARGET_MUL_SLOW)
+	*total += COSTS_N_INSNS (4);
+
+      return true;
+
+    case LSHIFTRT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == PLUS || outer_code == MINUS
+	       || outer_code == AND || outer_code == IOR
+	       || outer_code == XOR)
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* SRL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SRL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case ASHIFT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if (outer_code == AND || outer_code == IOR
+	       || outer_code == XOR)
+	*total = COSTS_N_INSNS (2);
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* SLL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* SLL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case ASHIFTRT:
+    case ROTATERT:
+      if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
+	*total = cost;
+      else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && satisfies_constraint_Iu05 (XEXP (x, 1)))
+	       || REG_P (XEXP (x, 1)))
+	/* ROTR, SLL instructions */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* ROTR, SLL instructions: IMM out of range.  */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case LT:
+    case LTU:
+      if (outer_code == SET)
+	{
+	  if ((GET_CODE (XEXP (x, 1)) == CONST_INT
+	      && satisfies_constraint_Iu15 (XEXP (x, 1)))
+	      || REG_P (XEXP (x, 1)))
+	    /* SLT, SLTI instructions */
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    /* SLT, SLT instructions: IMM out of range.  */
+	    *total = COSTS_N_INSNS (2);
+	}
+      else
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case EQ:
+    case NE:
+    case GE:
+    case LE:
+    case GT:
+      /* branch */
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case IF_THEN_ELSE:
+      if (GET_CODE (XEXP (x, 1)) == LABEL_REF)
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      else
+	/* cmovz, cmovn instructions */
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case LABEL_REF:
+      if (outer_code == IF_THEN_ELSE)
+	/* branch */
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	/* Using memory access. */
+	*total = COSTS_N_INSNS (1);
+      else
+	/* Zero extend and sign extend instructions.  */
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case NEG:
+    case NOT:
       *total = COSTS_N_INSNS (1);
-      break;
+      return true;
 
     case DIV:
     case UDIV:
     case MOD:
     case UMOD:
-      *total = COSTS_N_INSNS (7);
-      break;
+      *total = COSTS_N_INSNS (20);
+      return true;
 
-    default:
+    case CALL:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case CLZ:
+    case SMIN:
+    case SMAX:
+    case ZERO_EXTRACT:
+      if (TARGET_EXT_PERF)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (3);
+      return true;
+    case VEC_SELECT:
       *total = COSTS_N_INSNS (1);
-      break;
-    }
-
-  return true;
+      return true;
 
+    default:
+      *total = COSTS_N_INSNS (3);
+      return true;
+    }
+}
 
-size_cost:
-  /* This is section for size cost model.  */
-
+static bool
+nds32_rtx_costs_size_prefer (rtx x,
+			     int code,
+			     int outer_code,
+			     int opno ATTRIBUTE_UNUSED,
+			     int *total)
+{
   /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
      We treat it as 4-byte cost for each instruction
      under code size consideration.  */
@@ -118,85 +431,162 @@ nds32_rtx_costs_impl (rtx x,
 	     (set X imm20s), use movi, 4-byte cost.
 	     (set X BIG_INT), use sethi/ori, 8-byte cost.  */
 	  if (satisfies_constraint_Is05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else if (satisfies_constraint_Is20 (x))
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	  else
-	    *total = COSTS_N_INSNS (2);
+	    *total = insn_size_32bit * 2;
 	}
       else if (outer_code == PLUS || outer_code == MINUS)
 	{
 	  /* Possible addi333/subi333 or subi45/addi45, 2-byte cost.
 	     General case, cost 1 instruction with 4-byte.  */
 	  if (satisfies_constraint_Iu05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	}
       else if (outer_code == ASHIFT)
 	{
 	  /* Possible slli333, 2-byte cost.
 	     General case, cost 1 instruction with 4-byte.  */
 	  if (satisfies_constraint_Iu03 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	}
       else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT)
 	{
 	  /* Possible srai45 or srli45, 2-byte cost.
 	     General case, cost 1 instruction with 4-byte.  */
 	  if (satisfies_constraint_Iu05 (x))
-	    *total = COSTS_N_INSNS (1) - 2;
+	    *total = insn_size_16bit;
 	  else
-	    *total = COSTS_N_INSNS (1);
+	    *total = insn_size_32bit;
 	}
       else
 	{
 	  /* For other cases, simply set it 4-byte cost.  */
-	  *total = COSTS_N_INSNS (1);
+	  *total = insn_size_32bit;
 	}
       break;
 
     case CONST_DOUBLE:
       /* It requires high part and low part processing, set it 8-byte cost.  */
-      *total = COSTS_N_INSNS (2);
+      *total = insn_size_32bit * 2;
+      break;
+
+    case CONST:
+    case SYMBOL_REF:
+      *total = insn_size_32bit * 2;
       break;
 
     default:
       /* For other cases, generally we set it 4-byte cost
-         and stop resurively traversing.  */
-      *total = COSTS_N_INSNS (1);
+	 and stop resurively traversing.  */
+      *total = insn_size_32bit;
       break;
     }
 
   return true;
 }
 
-int
-nds32_address_cost_impl (rtx address,
-			 machine_mode mode ATTRIBUTE_UNUSED,
-			 addr_space_t as ATTRIBUTE_UNUSED,
-			 bool speed)
+void
+nds32_init_rtx_costs (void)
+{
+  rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer;
+  rtx_cost_model.size_prefer  = nds32_rtx_costs_size_prefer;
+
+  if (TARGET_16_BIT)
+    insn_size_16bit = 2;
+  else
+    insn_size_16bit = 4;
+}
+
+/* This target hook describes the relative costs of RTL expressions.
+   Return 'true' when all subexpressions of x have been processed.
+   Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
+   Refer to gcc/rtlanal.c for more information.  */
+bool
+nds32_rtx_costs_impl (rtx x,
+		      machine_mode mode ATTRIBUTE_UNUSED,
+		      int outer_code,
+		      int opno,
+		      int *total,
+		      bool speed)
+{
+  int code = GET_CODE (x);
+
+  /* According to 'speed', use suitable cost model section.  */
+  if (speed)
+    return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total);
+  else
+    return rtx_cost_model.size_prefer(x, code, outer_code, opno, total);
+}
+
+
+int nds32_address_cost_speed_prefer (rtx address)
 {
   rtx plus0, plus1;
   enum rtx_code code;
 
   code = GET_CODE (address);
 
-  /* According to 'speed', goto suitable cost model section.  */
-  if (speed)
-    goto performance_cost;
-  else
-    goto size_cost;
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return COSTS_N_INSNS (1) - 2;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+	Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1) - 2;
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+	return COSTS_N_INSNS (1) - 2;
+      else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return COSTS_N_INSNS (1) - 1;
+      else if (REG_P (plus0) && REG_P (plus1))
+	return COSTS_N_INSNS (1);
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
+
+    default:
+      break;
+    }
+
+  return COSTS_N_INSNS (4);
 
-performance_cost:
-  /* This is section for performance cost model.  */
+}
 
-  /* FALLTHRU, currently we use same cost model as size_cost.  */
+int nds32_address_cost_speed_fwprop (rtx address)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
 
-size_cost:
-  /* This is section for size cost model.  */
+  code = GET_CODE (address);
 
   switch (code)
     {
@@ -210,12 +600,12 @@ nds32_address_cost_impl (rtx address,
     case SYMBOL_REF:
       /* We can have gp-relative load/store for symbol_ref.
 	 Have it 4-byte cost.  */
-      return COSTS_N_INSNS (1);
+      return COSTS_N_INSNS (2);
 
     case CONST:
       /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
 	 Have it 4-byte cost.  */
-      return COSTS_N_INSNS (1);
+      return COSTS_N_INSNS (2);
 
     case REG:
       /* Simply return 4-byte costs.  */
@@ -233,11 +623,15 @@ nds32_address_cost_impl (rtx address,
 	{
 	  /* If it is possible to be lwi333/swi333 form,
 	     make it 2-byte cost.  */
-	  if (satisfies_constraint_Iu05 (plus1))
+	  if (satisfies_constraint_Iu03 (plus1))
 	    return (COSTS_N_INSNS (1) - 2);
 	  else
 	    return COSTS_N_INSNS (1);
 	}
+      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return COSTS_N_INSNS (1) - 2;
+      else if (REG_P (plus0) && REG_P (plus1))
+	return COSTS_N_INSNS (1);
 
       /* For other 'plus' situation, make it cost 4-byte.  */
       return COSTS_N_INSNS (1);
@@ -249,4 +643,84 @@ nds32_address_cost_impl (rtx address,
   return COSTS_N_INSNS (4);
 }
 
+
+int nds32_address_cost_size_prefer (rtx address)
+{
+  rtx plus0, plus1;
+  enum rtx_code code;
+
+  code = GET_CODE (address);
+
+  switch (code)
+    {
+    case POST_MODIFY:
+    case POST_INC:
+    case POST_DEC:
+      /* We encourage that rtx contains
+	 POST_MODIFY/POST_INC/POST_DEC behavior.  */
+      return 0;
+
+    case SYMBOL_REF:
+      /* We can have gp-relative load/store for symbol_ref.
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+      /* It is supposed to be the pattern (const (plus symbol_ref const_int)).
+	 Have it 4-byte cost.  */
+      return COSTS_N_INSNS (2);
+
+    case REG:
+      /* Simply return 4-byte costs.  */
+      return COSTS_N_INSNS (1) - 1;
+
+    case PLUS:
+      /* We do not need to check if the address is a legitimate address,
+	 because this hook is never called with an invalid address.
+	 But we better check the range of
+	 const_int value for cost, if it exists.  */
+      plus0 = XEXP (address, 0);
+      plus1 = XEXP (address, 1);
+
+      if (REG_P (plus0) && CONST_INT_P (plus1))
+	{
+	  /* If it is possible to be lwi333/swi333 form,
+	     make it 2-byte cost.  */
+	  if (satisfies_constraint_Iu03 (plus1))
+	    return (COSTS_N_INSNS (1) - 2);
+	  else
+	    return COSTS_N_INSNS (1) - 1;
+	}
+
+      /* (plus (reg) (mult (reg) (const))) */
+      if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
+	return (COSTS_N_INSNS (1) - 1);
+
+      /* For other 'plus' situation, make it cost 4-byte.  */
+      return COSTS_N_INSNS (1);
+
+    default:
+      break;
+    }
+
+  return COSTS_N_INSNS (4);
+
+}
+
+int nds32_address_cost_impl (rtx address,
+			     machine_mode mode ATTRIBUTE_UNUSED,
+			     addr_space_t as ATTRIBUTE_UNUSED,
+			     bool speed_p)
+{
+  if (speed_p)
+    {
+      if (current_pass->tv_id == TV_FWPROP)
+	return nds32_address_cost_speed_fwprop (address);
+      else
+	return nds32_address_cost_speed_prefer (address);
+    }
+  else
+    return nds32_address_cost_size_prefer (address);
+}
+
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md
index 7df715a771f..7ee6489d034 100644
--- a/gcc/config/nds32/nds32-doubleword.md
+++ b/gcc/config/nds32/nds32-doubleword.md
@@ -118,10 +118,28 @@
      ])
    (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
 
+;; Split move_di pattern when the hard register is odd.
+(define_split
+  [(set (match_operand:DIDF 0 "register_operand" "")
+	(match_operand:DIDF 1 "register_operand" ""))]
+  "(NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
+    && ((REGNO (operands[0]) & 0x1) == 1))
+   || (NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
+       && ((REGNO (operands[1]) & 0x1) == 1))"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  {
+     operands[2] = gen_lowpart (SImode, operands[0]);
+     operands[4] = gen_highpart (SImode, operands[0]);
+     operands[3] = gen_lowpart (SImode, operands[1]);
+     operands[5] = gen_highpart (SImode, operands[1]);
+  }
+)
+
 (define_split
   [(set (match_operand:DIDF 0 "register_operand"     "")
 	(match_operand:DIDF 1 "const_double_operand" ""))]
-  "reload_completed"
+  "flag_pic || reload_completed"
   [(set (match_dup 2) (match_dup 3))
    (set (match_dup 4) (match_dup 5))]
 {
diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md
new file mode 100644
index 00000000000..4c643a7528f
--- /dev/null
+++ b/gcc/config/nds32/nds32-dspext.md
@@ -0,0 +1,5278 @@
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+	(match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  /* If operands[1] is a large constant and cannot be performed
+     by a single instruction, we need to split it.  */
+  if (GET_CODE (operands[1]) == CONST_VECTOR
+      && !satisfies_constraint_CVs2 (operands[1])
+      && !satisfies_constraint_CVhi (operands[1]))
+    {
+      HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]);
+      rtx tmp_rtx;
+
+      tmp_rtx = can_create_pseudo_p ()
+		? gen_reg_rtx (SImode)
+		: simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+
+      emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode));
+      convert_move (operands[0], tmp_rtx, false);
+      DONE;
+    }
+
+  if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1]))
+    {
+      if (nds32_tls_referenced_p (operands [1]))
+	{
+	  nds32_expand_tls_move (operands);
+	  DONE;
+	}
+      else if (flag_pic)
+	{
+	  nds32_expand_pic_move (operands);
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$  l,$  l,$  l,$  d,  d, r,$   d,    r,    r,    r, *f, *f,  r, *f,  Q")
+	(match_operand:VQIHI 1 "nds32_vmove_operand"  " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f,  r, *f,  Q, *f"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], <MODE>mode)
+       || register_operand(operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov55\t%0, %1";
+    case 1:
+      return "ori\t%0, %1, 0";
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      return nds32_output_16bit_store (operands, <byte>);
+    case 6:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 7:
+    case 8:
+    case 9:
+    case 10:
+    case 11:
+      return nds32_output_16bit_load (operands, <byte>);
+    case 12:
+      return nds32_output_32bit_load (operands, <byte>);
+    case 13:
+      return "movpi45\t%0, %1";
+    case 14:
+      return "movi55\t%0, %1";
+    case 15:
+      return "movi\t%0, %1";
+    case 16:
+      return "sethi\t%0, hi20(%1)";
+    case 17:
+      if (TARGET_FPU_SINGLE)
+	return "fcpyss\t%0, %1, %1";
+      else
+	return "#";
+    case 18:
+      return "fmtsr\t%1, %0";
+    case 19:
+      return "fmfsr\t%0, %1";
+    case 20:
+      return nds32_output_float_load (operands);
+    case 21:
+      return nds32_output_float_store (operands);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore")
+   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,   4,    4,    4,    4,     4")
+   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1, fpu,  fpu,  fpu,  fpu,   fpu")])
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_operand" "")
+	(match_operand:V2SI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  /* Need to force register if mem <- !reg.  */
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (V2SImode, operands[1]);
+})
+
+(define_insn "*movv2si"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r,  r, r, Da, m, f, Q, f, r, f")
+	(match_operand:V2SI 1 "general_operand"      " r, i, Da, m,  r, r, Q, f, f, f, r"))]
+  "NDS32_EXT_DSP_P ()
+   && (register_operand(operands[0], V2SImode)
+       || register_operand(operands[1], V2SImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "movd44\t%0, %1";
+    case 1:
+      /* reg <- const_int, we ask gcc to split instruction.  */
+      return "#";
+    case 2:
+      /* The memory format is (mem (reg)),
+	 we can generate 'lmw.bi' instruction.  */
+      return nds32_output_double (operands, true);
+    case 3:
+      /* We haven't 64-bit load instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 4:
+      /* The memory format is (mem (reg)),
+	 we can generate 'smw.bi' instruction.  */
+      return nds32_output_double (operands, false);
+    case 5:
+      /* We haven't 64-bit store instruction,
+	 we split this pattern to two SImode pattern.  */
+      return "#";
+    case 6:
+      return nds32_output_float_load (operands);
+    case 7:
+      return nds32_output_float_store (operands);
+    case 8:
+      return "fcpysd\t%0, %1, %1";
+    case 9:
+      return "fmfdr\t%0, %1";
+    case 10:
+      return "fmtdr\t%1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!TARGET_16_BIT")
+		     (const_int 4)
+		     (const_int 2))
+       ;; Alternative 1
+       (const_int 16)
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 8)
+       ;; Alternative 4
+       (const_int 4)
+       ;; Alternative 5
+       (const_int 8)
+       ;; Alternative 6
+       (const_int 4)
+       ;; Alternative 7
+       (const_int 4)
+       ;; Alternative 8
+       (const_int 4)
+       ;; Alternative 9
+       (const_int 4)
+       ;; Alternative 10
+       (const_int 4)
+     ])
+   (set_attr "feature" " v1, v1,  v1,  v1,   v1,   v1,    fpu,    fpu,    fpu,    fpu,    fpu")])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VQIHI 0 "general_operand" "")
+	(match_operand:VQIHI 1 "general_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  rtx addr;
+  if (MEM_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (MEM_P (operands[0]))
+    {
+      addr = force_reg (Pmode, XEXP (operands[0], 0));
+      emit_insn (gen_unaligned_store<mode> (addr, operands[1]));
+    }
+  else
+    {
+      addr = force_reg (Pmode, XEXP (operands[1], 0));
+      emit_insn (gen_unaligned_load<mode> (operands[0], addr));
+    }
+  DONE;
+})
+
+(define_expand "unaligned_load<mode>"
+  [(set (match_operand:VQIHI 0 "register_operand" "=r")
+	(unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_load (operands, <MODE>mode);
+  else
+    emit_insn (gen_unaligned_load_w<mode> (operands[0], gen_rtx_MEM (<MODE>mode, operands[1])));
+  DONE;
+})
+
+(define_insn "unaligned_load_w<mode>"
+  [(set (match_operand:VQIHI 0 "register_operand"                          "=  r")
+	(unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_lmw_single_word (operands);
+}
+  [(set_attr "type"   "load")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "unaligned_store<mode>"
+  [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r"))
+	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_ISA_V3M)
+    nds32_expand_unaligned_store (operands, <MODE>mode);
+  else
+    emit_insn (gen_unaligned_store_w<mode> (gen_rtx_MEM (<MODE>mode, operands[0]), operands[1]));
+  DONE;
+})
+
+(define_insn "unaligned_store_w<mode>"
+  [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand"      "=Umw")
+	(unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "   r")] UNSPEC_UASTORE_W))]
+  "NDS32_EXT_DSP_P ()"
+{
+  return nds32_output_smw_single_word (operands);
+}
+  [(set_attr "type"   "store")
+   (set_attr "length"     "4")]
+)
+
+(define_insn "<uk>add<mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"                 "=r")
+	(all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+			(match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>add<bits> %0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "<uk>adddi3"
+  [(set (match_operand:DI 0 "register_operand"              "=r")
+	(all_plus:DI (match_operand:DI 1 "register_operand" " r")
+		     (match_operand:DI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>add64 %0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+	(truncate:V4QI
+	  (ashiftrt:V4HI
+	    (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+		       (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uraddv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                  "=r")
+	(truncate:V4QI
+	  (lshiftrt:V4HI
+	    (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+		       (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "raddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+	(truncate:V2HI
+	  (ashiftrt:V2SI
+	    (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+		       (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "uraddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                  "=r")
+	(truncate:V2HI
+	  (lshiftrt:V2SI
+	    (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+		       (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "radddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+	(truncate:DI
+	  (ashiftrt:TI
+	    (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		     (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "radd64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+
+(define_insn "uradddi3"
+  [(set (match_operand:DI 0 "register_operand"            "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		     (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "uradd64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "<uk>sub<mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"                  "=r")
+	(all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+			 (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>sub<bits> %0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "<uk>subdi3"
+  [(set (match_operand:DI 0 "register_operand"               "=r")
+	(all_minus:DI (match_operand:DI 1 "register_operand" " r")
+		      (match_operand:DI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<uk>sub64 %0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+(define_insn "rsubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+	(truncate:V4QI
+	  (ashiftrt:V4HI
+	    (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+			(sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv4qi3"
+  [(set (match_operand:V4QI 0 "register_operand"                                   "=r")
+	(truncate:V4QI
+	  (lshiftrt:V4HI
+	    (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r"))
+			(zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub8\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+	(truncate:V2HI
+	  (ashiftrt:V2SI
+	    (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+			(sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "ursubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                                   "=r")
+	(truncate:V2HI
+	  (lshiftrt:V2SI
+	    (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r"))
+			(zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "rsubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+	(truncate:DI
+	  (ashiftrt:TI
+	    (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		      (sign_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "rsub64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")])
+
+
+(define_insn "ursubdi3"
+  [(set (match_operand:DI 0 "register_operand"                   "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r"))
+		      (zero_extend:TI (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ursub64\t%0, %1, %2"
+  [(set_attr "type"    "dalu64")
+   (set_attr "length"  "4")])
+
+(define_expand "cras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "cras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "cras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "cras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "kcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "kcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "ukcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "ukcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "crsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "crsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "crsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "crsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "kcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "kcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "kcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "kcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "ukcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "ukcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "ukcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (us_minus:HI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand" " r")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (us_plus:HI
+	      (vec_select:HI
+		(match_dup 1)
+		(parallel [(const_int 1)]))
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "ukcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "rcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "rcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "urcras16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcras16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "urcras16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcras16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "rcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "rcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+	        (minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "rcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+	        (minus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (ashiftrt:SI
+		(plus:SI
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)])))
+		  (sign_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "urcrsa16_1"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "urcrsa16_1_le"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+	        (minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_insn "urcrsa16_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"           "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+	        (minus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" " r")
+		      (parallel [(const_int 0)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" " r")
+		      (parallel [(const_int 1)]))))
+		(const_int 1))))
+	  (vec_duplicate:V2HI
+	    (truncate:HI
+	      (lshiftrt:SI
+		(plus:SI
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 1)
+		      (parallel [(const_int 1)])))
+		  (zero_extend:SI
+		    (vec_select:HI
+		      (match_dup 2)
+		      (parallel [(const_int 0)]))))
+		(const_int 1))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "urcrsa16\t%0, %1, %2"
+  [(set_attr "type" "dalu")]
+)
+
+(define_expand "<shift>v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "")
+	(shifts:V2HI (match_operand:V2HI 1 "register_operand"     "")
+		     (match_operand:SI   2 "nds32_rimm4u_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "*ashlv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r, r")
+	(ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   slli16\t%0, %1, %2
+   sll16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "kslli16"
+  [(set (match_operand:V2HI 0 "register_operand"                   "=   r, r")
+	(ss_ashift:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+			(match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli16\t%0, %1, %2
+   ksll16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "*ashrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+	(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16\t%0, %1, %2
+   sra16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "sra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+	(unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+		     UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai16.u\t%0, %1, %2
+   sra16.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround,daluround")
+   (set_attr "length" "         4,       4")])
+
+(define_insn "*lshrv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=   r, r")
+	(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+		       (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16\t%0, %1, %2
+   srl16\t%0, %1, %2"
+  [(set_attr "type"   "dalu,dalu")
+   (set_attr "length" "   4,   4")])
+
+(define_insn "srl16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                                "=   r, r")
+	(unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand"   "    r, r")
+				     (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))]
+		     UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srli16.u\t%0, %1, %2
+   srl16.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround,daluround")
+   (set_attr "length" "        4,        4")])
+
+(define_insn "kslra16"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+	(if_then_else:V2HI
+	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
+		 (const_int 0))
+	  (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+			 (neg:SI (match_dup 2)))
+	  (ashift:V2HI (match_dup 1)
+		       (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+(define_insn "kslra16_round"
+  [(set (match_operand:V2HI 0 "register_operand"                  "=r")
+	(if_then_else:V2HI
+	  (lt:SI (match_operand:SI 2 "register_operand"           " r")
+		 (const_int 0))
+	  (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r")
+				       (neg:SI (match_dup 2)))]
+		       UNSPEC_ROUND)
+	  (ashift:V2HI (match_dup 1)
+		       (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslra16.u\t%0, %1, %2"
+  [(set_attr "type"    "daluround")
+   (set_attr "length"  "4")])
+
+(define_insn "cmpeq<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "cmpeq<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "scmplt<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmplt<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "scmple<bits>"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r")
+			   (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "scmple<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmplt<bits>"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+	(unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r")
+			    (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmplt<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "ucmple<bits>"
+  [(set (match_operand:SI 0 "register_operand"                        "=r")
+	(unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r")
+			    (match_operand:VQIHI 2 "register_operand" " r"))]
+		   UNSPEC_VEC_COMPARE))]
+  "NDS32_EXT_DSP_P ()"
+  "ucmple<bits>\t%0, %1, %2"
+  [(set_attr "type"    "dcmp")
+   (set_attr "length"  "4")])
+
+(define_insn "sclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+		     UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip16\t%0, %1, %2"
+  [(set_attr "type"    "dclip")
+   (set_attr "length"  "4")])
+
+(define_insn "uclip16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=   r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  "    r")
+		      (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")]
+		     UNSPEC_CLIP))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip16\t%0, %1, %2"
+  [(set_attr "type"    "dclip")
+   (set_attr "length"  "4")])
+
+(define_insn "khm16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+		      (match_operand:V2HI 2 "register_operand" "  r")]
+		     UNSPEC_KHM))]
+  "NDS32_EXT_DSP_P ()"
+  "khm16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"  "4")])
+
+(define_insn "khmx16"
+  [(set (match_operand:V2HI 0 "register_operand"                "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand"  " r")
+		      (match_operand:V2HI 2 "register_operand" "  r")]
+		     UNSPEC_KHMX))]
+  "NDS32_EXT_DSP_P ()"
+  "khmx16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_setv4qi"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:QI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 4)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1],
+				       operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_expand "insb"
+  [(match_operand:V4QI 0 "register_operand" "")
+   (match_operand:V4QI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0)
+    gcc_unreachable ();
+
+  rtx src = gen_reg_rtx (QImode);
+
+  convert_move (src, operands[2], false);
+
+  HOST_WIDE_INT selector_index;
+  /* Big endian need reverse index. */
+  if (TARGET_BIG_ENDIAN)
+    selector_index = 4 - INTVAL (operands[3]) - 1;
+  else
+    selector_index = INTVAL (operands[3]);
+  rtx selector = gen_int_mode (1 << selector_index, SImode);
+  emit_insn (gen_vec_setv4qi_internal (operands[0], src,
+				       operands[1], selector));
+  DONE;
+})
+
+(define_expand "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			 (match_operand:SI 1 "const_int_operand" "")
+			 (match_operand:SI 2 "nds32_insv_operand" ""))
+	(match_operand:SI 3 "register_operand" ""))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[1]) != 8)
+    FAIL;
+}
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+
+(define_insn "insvsi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+			 (const_int 8)
+			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
+	(match_operand:SI 2                  "register_operand"    "r"))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "insvsiqi_internal"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
+			 (const_int 8)
+			 (match_operand:SI 1 "nds32_insv_operand"  "i"))
+	(zero_extend:SI (match_operand:QI 2 "register_operand"    "r")))]
+  "NDS32_EXT_DSP_P ()"
+  "insb\t%0, %2, %v1"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+;; Intermedium pattern for synthetize insvsiqi_internal
+;; v0 = ((v1 & 0xff) << 8)
+(define_insn_and_split "and0xff_s8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (const_int 8))
+		(const_int 65280)))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode)));
+  emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode)));
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000)
+(define_insn_and_split "insbsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(const_int -16711681))
+		(and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+				   (const_int 16))
+			(const_int 16711680))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (tmp, operands[1]);
+  emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2]));
+  emit_move_insn (operands[0], tmp);
+  DONE;
+})
+
+;; v0 = (v1 & 0xff00ffff) | v2
+(define_insn_and_split "ior_and0xff00ffff_reg"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -16711681))
+		(match_operand:SI 2 "register_operand" "r")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode)));
+  emit_insn (gen_iorsi3 (operands[0], tmp, operands[2]));
+  DONE;
+})
+
+(define_insn "vec_setv4qi_internal"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (match_operand:QI 1 "register_operand"        "    r,    r,    r,    r"))
+	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+       const char *pats[] = { "insb\t%0, %1, 3",
+			      "insb\t%0, %1, 2",
+			      "insb\t%0, %1, 1",
+			      "insb\t%0, %1, 0" };
+      return pats[which_alternative];
+    }
+  else
+    {
+       const char *pats[] = { "insb\t%0, %1, 0",
+			      "insb\t%0, %1, 1",
+			      "insb\t%0, %1, 2",
+			      "insb\t%0, %1, 3" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_setv4qi_internal_vec"
+  [(set (match_operand:V4QI 0 "register_operand"          "=   r,    r,    r,    r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand"    "    r,    r,    r,    r")
+	      (parallel [(const_int 0)])))
+	  (match_operand:V4QI 2 "register_operand"        "    0,    0,    0,    0")
+	  (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   insb\t%0, %1, 0
+   insb\t%0, %1, 1
+   insb\t%0, %1, 2
+   insb\t%0, %1, 3"
+  [(set_attr "type"    "dinsb")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev4qi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev4qi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"       "=$l,r")
+	(vec_merge:V4QI
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V4QI
+	    (vec_select:QI
+	      (match_operand:V4QI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_1"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergeqi_and_cv0_2"
+  [(set (match_operand:V4QI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V4QI
+	  (const_vector:V4QI [
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" "  l,r"))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeb33\t%0, %1
+   zeb\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "vec_setv2hi"
+  [(match_operand:V2HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+  if (pos > 2)
+    gcc_unreachable ();
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos;
+  emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1],
+				       operands[0], GEN_INT (elem)));
+  DONE;
+})
+
+(define_insn "vec_setv2hi_internal"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"    "    r,    r"))
+	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pkbb16\t%0, %1, %2",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+			     "pkbb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergev2hi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergev2hi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"       "=$l,r")
+	(vec_merge:V2HI
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand" "  l,r")
+	      (parallel [(const_int 0)])))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_1"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_mergehi_and_cv0_2"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=$l,r")
+	(vec_merge:V2HI
+	  (const_vector:V2HI [
+	    (const_int 0)
+	    (const_int 0)])
+	  (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "  l,r"))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   zeh33\t%0, %1
+   zeh\t%0, %1"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_expand "pkbb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (1), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (0), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "pkbbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 65535))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))
+		(and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extend:SI	(match_operand:HI 1 "register_operand" "r"))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkbbsi_4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI	(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))
+		(zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pkbb16\t%0, %2, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff)
+(define_insn "pktbsi_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -65536))
+		(zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int -65536))
+		(and:SI (match_operand:SI 2 "register_operand" "r")
+			(const_int 65535))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_3"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 16 )
+			 (const_int 0))
+	(match_operand:SI 1 "register_operand"                  " r"))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pktbsi_4"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 16 )
+			 (const_int 0))
+	(zero_extend:SI (match_operand:HI 1 "register_operand"  " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "pktb16\t%0, %0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "pkttsi"
+  [(set (match_operand:SI 0 "register_operand"                      "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand"      " r")
+			(const_int -65536))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" " r")
+			     (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+  "pktt16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "pkbt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (1), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (0), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktt"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (0), GEN_INT (0)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (1), GEN_INT (1)));
+    }
+  DONE;
+})
+
+(define_expand "pktb"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand:V2HI 2 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (1), GEN_INT (0), GEN_INT (1)));
+    }
+  else
+    {
+      emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2],
+				  GEN_INT (2), GEN_INT (1), GEN_INT (0)));
+    }
+  DONE;
+})
+
+(define_insn "vec_mergerr"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"    "    r,    r"))
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 2 "register_operand"    "    r,    r"))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+
+(define_insn "vec_merge"
+  [(set (match_operand:V2HI 0 "register_operand"      "=   r,    r")
+	(vec_merge:V2HI
+	  (match_operand:V2HI 1 "register_operand"    "    r,    r")
+	  (match_operand:V2HI 2 "register_operand"    "    r,    r")
+	  (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktb16\t%0, %1, %2",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pktb16\t%0, %2, %1",
+			     "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergerv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 1 "register_operand"                   "    r,    r,    r,    r"))
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pktb16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pkbt16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevr"
+  [(set (match_operand:V2HI 0 "register_operand"                      "=   r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")])))
+	  (vec_duplicate:V2HI
+	    (match_operand:HI 2 "register_operand"                    "    r,    r,    r,    r"))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                 " Iv01, Iv01, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   pkbb16\t%0, %2, %1
+   pkbt16\t%0, %2, %1
+   pkbb16\t%0, %1, %2
+   pktb16\t%0, %1, %2"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_mergevv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=   r,    r,    r,    r,    r,    r,    r,    r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")])))
+	  (vec_duplicate:V2HI
+	    (vec_select:HI
+	      (match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r,    r,    r,    r,    r")
+	      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")])))
+	  (match_operand:SI 3 "nds32_imm_1_2_operand"                " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "pktt16\t%0, %1, %2",
+			     "pktb16\t%0, %1, %2",
+			     "pkbb16\t%0, %1, %2",
+			     "pkbt16\t%0, %1, %2",
+			     "pktt16\t%0, %2, %1",
+			     "pkbt16\t%0, %2, %1",
+			     "pkbb16\t%0, %2, %1",
+			     "pktb16\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "pkbb16\t%0, %2, %1",
+			     "pktb16\t%0, %2, %1",
+			     "pktt16\t%0, %2, %1",
+			     "pkbt16\t%0, %2, %1",
+			     "pkbb16\t%0, %1, %2",
+			     "pkbt16\t%0, %1, %2",
+			     "pktt16\t%0, %1, %2",
+			     "pktb16\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv4qi"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(vec_select:QI
+	  (match_operand:V4QI 1          "nonimmediate_operand" "")
+	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1
+      && INTVAL (operands[2]) != 2
+      && INTVAL (operands[2]) != 3)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv4qi0"
+  [(set (match_operand:QI 0 "register_operand"         "=l,r,r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_ze"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+	(zero_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeb33\t%0, %1";
+    case 1:
+      return "zeb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi0_se"
+  [(set (match_operand:SI 0 "register_operand"         "=l,r,r")
+	(sign_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seb33\t%0, %1";
+    case 1:
+      return "seb\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 1);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_1 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_2 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" " r")
+	  (parallel [(const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_rotrv4qi_3 (tmp, operands[1]));
+  emit_insn (gen_vec_extractv4qi0 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "vec_extractv4qi3_se"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+	(sign_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 24
+   srai\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv4qi3_ze"
+  [(set (match_operand:SI 0 "register_operand"       "=$d,r")
+	(zero_extend:SI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srli45\t%0, 24
+   srli\t%0, %1, 24"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn_and_split "vec_extractv4qihi0"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi0 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi1 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi2 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "vec_extractv4qihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" " r")
+	    (parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx tmp = gen_reg_rtx (QImode);
+  emit_insn (gen_vec_extractv4qi3 (tmp, operands[1]));
+  emit_insn (gen_extendqihi2 (operands[0], tmp));
+  DONE;
+}
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_extractv2hi"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI
+	  (match_operand:V2HI 1          "nonimmediate_operand" "")
+	  (parallel [(match_operand:SI 2 "const_int_operand" "")])))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (INTVAL (operands[2]) != 0
+      && INTVAL (operands[2]) != 1)
+    gcc_unreachable ();
+
+  if (INTVAL (operands[2]) != 0 && MEM_P (operands[0]))
+    FAIL;
+})
+
+(define_insn "vec_extractv2hi0"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "vec_extractv2hi0_ze"
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r,$  l, *r")
+        (zero_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "nonimmediate_operand" "  l, r, U33,  m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "zeh33\t%0, %1";
+    case 1:
+      return "zeh\t%0, %1";
+    case 2:
+      return nds32_output_16bit_load (operands, 2);
+    case 3:
+      return nds32_output_32bit_load (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load,load")
+   (set_attr "length" "  2,  4,   2,   4")])
+
+(define_insn "vec_extractv2hi0_se"
+  [(set (match_operand:SI 0 "register_operand"         "=$l, r, r")
+        (sign_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,load")
+   (set_attr "length" "  2,  4,   4")])
+
+(define_insn "vec_extractv2hi0_be"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "  0,r")
+	  (parallel [(const_int 0)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1"
+  [(set (match_operand:HI 0 "register_operand"     "=$d,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "  0,r")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_se"
+  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
+	(sign_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srai45\t%0, 16
+   srai\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_ze"
+  [(set (match_operand:SI 0 "register_operand"     "=$d,r")
+	(zero_extend:SI
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "register_operand" "  0,r")
+	    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "@
+   srli45\t%0, 16
+   srli\t%0, %1, 16"
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")])
+
+(define_insn "vec_extractv2hi1_be"
+  [(set (match_operand:HI 0 "register_operand"         "=$l,r,r")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "nonimmediate_operand" "  l,r,m")
+	  (parallel [(const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "seh33\t%0, %1";
+    case 1:
+      return "seh\t%0, %1";
+    case 2:
+      return nds32_output_32bit_load_s (operands, 2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"    "alu,alu,load")
+   (set_attr "length"  "  2,  4,   4")])
+
+(define_insn "<su>mul16"
+  [(set (match_operand:V2SI 0 "register_operand"                         "=r")
+	(mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r"))
+		   (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mul16\t%0, %1, %2"
+  [(set_attr "type"   "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mulx16"
+  [(set (match_operand:V2SI 0 "register_operand"         "=r")
+	(vec_merge:V2SI
+	  (vec_duplicate:V2SI
+	    (mult:SI
+	      (extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 1 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))))
+	  (vec_duplicate:V2SI
+	    (mult:SI
+	      (extend:SI
+		(vec_select:HI
+		  (match_dup 1)
+		  (parallel [(const_int 1)])))
+	      (extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mulx16\t%0, %1, %2"
+  [(set_attr "type"    "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "rotrv2hi_1"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+	(vec_select:V2HI
+	   (match_operand:V2HI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv2hi_1_be"
+  [(set (match_operand:V2HI 0 "register_operand"    "=r")
+	(vec_select:V2HI
+	   (match_operand:V2HI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_1_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 8"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_2_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 16"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "rotrv4qi_3_be"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "rotri\t%0, %1, 24"
+  [(set_attr "type"   "alu")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_10"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pkbb\t%0, %1, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "v4qi_dup_32"
+  [(set (match_operand:V4QI 0 "register_operand"    "=r")
+	(vec_select:V4QI
+	   (match_operand:V4QI 1 "register_operand" " r")
+	   (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "pktt\t%0, %1, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "vec_unpacks_lo_v4qi"
+  [(match_operand:V2HI 0 "register_operand" "=r")
+   (match_operand:V4QI 1 "register_operand" " r")]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+{
+  emit_insn (gen_sunpkd810 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd810_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd810_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd810\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd820_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd820_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd820\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd830_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd830_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd830\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "sunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_sunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "<zs>unpkd831_imp"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 3)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 1)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_inv"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 3)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 0)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 2)]))))
+	  (const_int 1)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_insn "<zs>unpkd831_imp_inv_be"
+  [(set (match_operand:V2HI 0 "register_operand"                     "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_operand:V4QI 1 "register_operand"             " r")
+		(parallel [(const_int 2)]))))
+	  (vec_duplicate:V2HI
+	    (extend:HI
+	      (vec_select:QI
+		(match_dup 1)
+		(parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "<zs>unpkd831\t%0, %1"
+  [(set_attr "type"    "dpack")
+   (set_attr "length"  "4")])
+
+(define_expand "zunpkd810"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd810_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd820"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd820_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd830"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd830_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "zunpkd831"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_zunpkd831_imp (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "smbb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smbt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smtt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2],
+			      GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "mulhisi3v"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(mult:SI
+	  (sign_extend:SI
+	     (vec_select:HI
+	       (match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+	  (sign_extend:SI (vec_select:HI
+	       (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+	       (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")])))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smtt\t%0, %1, %2",
+			     "smbt\t%0, %2, %1",
+			     "smbb\t%0, %1, %2",
+			     "smbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smbb\t%0, %1, %2",
+			     "smbt\t%0, %1, %2",
+			     "smtt\t%0, %1, %2",
+			     "smbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmabb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (1),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (0),
+				 operands[1]));
+  DONE;
+})
+
+(define_expand "kmabt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (0),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (1),
+				 operands[1]));
+  DONE;
+})
+
+(define_expand "kmatt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (0), GEN_INT (0),
+				 operands[1]));
+  else
+    emit_insn (gen_kma_internal (operands[0], operands[2], operands[3],
+				 GEN_INT (1), GEN_INT (1),
+				 operands[1]));
+  DONE;
+})
+
+(define_insn "kma_internal"
+  [(set (match_operand:SI 0 "register_operand"                          "=    r,    r,    r,    r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"                "    r,    r,    r,    r")
+	        (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand"  " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:SI
+	      (vec_select:HI
+	        (match_operand:V2HI 2 "register_operand"                "    r,    r,    r,    r")
+	        (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand"  " Iv00, Iv01, Iv01, Iv00")]))))
+	  (match_operand:SI 5 "register_operand"                        "     0,    0,    0,    0")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmatt\t%0, %1, %2",
+			     "kmabt\t%0, %2, %1",
+			     "kmabb\t%0, %1, %2",
+			     "kmabt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmabb\t%0, %1, %2",
+			     "kmabt\t%0, %1, %2",
+			     "kmatt\t%0, %1, %2",
+			     "kmabt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smds"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smds_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smds_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smds_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smds_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "smdrs_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smdrs_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+
+(define_expand "smxdsv_le"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_expand "smxdsv_be"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" " r")
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" " r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+})
+
+(define_insn "smal1"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"    " r")
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal2"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"  " r")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal3"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"    " r")
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal4"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI (match_operand:DI 1 "register_operand"  " r")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal5"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 0)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 1)])))))
+	  (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal6"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 0)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 1)]))))
+	  (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal7"
+  [(set (match_operand:DI 0 "register_operand"             "=r")
+	(plus:DI
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_operand:V2HI 2 "register_operand" " r")
+		  (parallel [(const_int 1)])))
+	      (sign_extend:SI
+		(vec_select:HI
+		  (match_dup 2)
+		  (parallel [(const_int 0)])))))
+	  (match_operand:DI 1 "register_operand"           " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smal8"
+  [(set (match_operand:DI 0 "register_operand"           "=r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand" " r")
+		(parallel [(const_int 1)])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_dup 2)
+		(parallel [(const_int 0)]))))
+	  (match_operand:DI 1 "register_operand"         " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "smal\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for smal
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+;; We need this dummy pattern for usmar64/usmsr64
+(define_insn_and_split "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+  emit_move_insn (low_part_dst, operands[1]);
+  emit_move_insn (high_part_dst, const0_rtx);
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "NDS32_EXT_DSP_P ()"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  rtx high_part_dst, low_part_dst;
+
+  low_part_dst = nds32_di_low_part_subreg (operands[0]);
+  high_part_dst = nds32_di_high_part_subreg (operands[0]);
+
+
+  emit_insn (gen_extendhisi2 (low_part_dst, operands[1]));
+  emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31)));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand"                 "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "sunpkd820\t%0, %1"
+  [(set_attr "type"       "dpack")
+   (set_attr "length"     "4")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "smmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI [(mult:DI
+		  	  (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+			  (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))]
+		       UNSPEC_ROUND)
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "smmul.u\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"             " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmac_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"                         " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI [(mult:DI
+			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+			 UNSPEC_ROUND)
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmac.u\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb"
+  [(set (match_operand:SI 0 "register_operand"                         "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand"            " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+		(sign_extend:DI (match_operand:SI 3 "register_operand" " r")))
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmmsb_round"
+  [(set (match_operand:SI 0 "register_operand"                                     "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand"                        " 0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI [(mult:DI
+			    (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))
+			    (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))]
+			 UNSPEC_ROUND)
+	      (const_int 32)))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmmsb.u\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (ss_mult:DI
+	      (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+	      (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "kwmmul_round"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI [
+	      (ss_mult:DI
+		(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2))
+		(mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))]
+	      UNSPEC_ROUND)
+	    (const_int 32))))]
+  "NDS32_EXT_DSP_P ()"
+  "kwmmul.u\t%0, %1, %2"
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smmwt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smulhisi3_highpart_1"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+	      (sign_extend:DI
+	        (vec_select:HI
+		  (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+			     "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+			     "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_insn "smulhisi3_highpart_2"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (sign_extend:DI
+	        (vec_select:HI
+		  (match_operand:V2HI 1 "register_operand"               "    r,    r")
+		  (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand"     "    r,    r")))
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt\t%0, %1, %2",
+			     "smmwb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb\t%0, %1, %2",
+			     "smmwt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "smmwb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smmwt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0)));
+  else
+    emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smmw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r,    r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (unspec:DI
+	      [(mult:DI
+		 (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+		 (sign_extend:DI
+		   (vec_select:HI
+		     (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		     (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+	      UNSPEC_ROUND)
+	    (const_int 16))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smmwt.u\t%0, %1, %2",
+			     "smmwb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smmwb.u\t%0, %1, %2",
+			     "smmwt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmul")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+})
+
+(define_expand "kmmawt"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+})
+
+(define_insn "kmmaw_internal"
+  [(set (match_operand:SI 0 "register_operand"                               "=   r,    r")
+	(ss_plus:SI
+	  (match_operand:SI 4 "register_operand"                             "    0,    0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (mult:DI
+		(sign_extend:DI (match_operand:SI 1 "register_operand"       "    r,    r"))
+		  (sign_extend:DI
+		    (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))
+	      (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt\t%0, %1, %2",
+			     "kmmawb\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb\t%0, %1, %2",
+			     "kmmawt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawb_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  DONE;
+}
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_expand "kmmawt_round"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1]));
+  else
+    emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1]));
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+
+(define_insn "kmmaw_round_internal"
+  [(set (match_operand:SI 0 "register_operand"                                "=   r,    r")
+	(ss_plus:SI
+	  (match_operand:SI 4 "register_operand"                              "    0,    0")
+	  (truncate:SI
+	    (lshiftrt:DI
+	      (unspec:DI
+		[(mult:DI
+		   (sign_extend:DI (match_operand:SI 1 "register_operand"     "    r,    r"))
+		   (sign_extend:DI
+		     (vec_select:HI
+		       (match_operand:V2HI 2 "register_operand"               "    r,    r")
+		       (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))]
+		UNSPEC_ROUND)
+	      (const_int 16)))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "kmmawt.u\t%0, %1, %2",
+			     "kmmawb.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "kmmawb.u\t%0, %1, %2",
+			     "kmmawt.u\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalbb"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (1)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (0)));
+  DONE;
+})
+
+(define_expand "smalbt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (1)));
+  DONE;
+})
+
+(define_expand "smaltt"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:V2HI 3 "register_operand" "")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (0), GEN_INT (0)));
+  else
+    emit_insn (gen_smaddhidi (operands[0], operands[2],
+			      operands[3], operands[1],
+			      GEN_INT (1), GEN_INT (1)));
+  DONE;
+})
+
+(define_insn "smaddhidi"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(plus:DI
+	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1",
+			     "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2",
+			     "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaddhidi2"
+  [(set (match_operand:DI 0 "register_operand"                         "=   r,    r,    r,    r")
+	(plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 1 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")])))
+	    (sign_extend:DI
+	      (vec_select:HI
+		(match_operand:V2HI 2 "register_operand"               "    r,    r,    r,    r")
+		(parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))
+	  (match_operand:DI 3 "register_operand"                       "    0,    0,    0,    0")))]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    {
+      const char *pats[] = { "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1",
+			     "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2" };
+      return pats[which_alternative];
+    }
+  else
+    {
+      const char *pats[] = { "smalbb\t%0, %1, %2",
+			     "smalbt\t%0, %1, %2",
+			     "smaltt\t%0, %1, %2",
+			     "smalbt\t%0, %2, %1" };
+      return pats[which_alternative];
+    }
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalda1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"    "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalda1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smalds1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smaldrs3"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smaldrs3_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smaldrs3_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smaldrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_expand "smalxda1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "smalxds1"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" " r")
+   (match_operand:V2HI 3 "register_operand" " r")]
+  "NDS32_EXT_DSP_P ()"
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "smalxd<add_sub>1_le"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus_minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 0)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 1)]))))))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "smalxd<add_sub>\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+
+(define_insn "smalxd<add_sub>1_be"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"                           " 0")
+	  (sign_extend:DI
+	    (plus_minus:SI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 0)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 2)
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_dup 3)
+				  (parallel [(const_int 0)]))))))))]
+  "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN"
+  "smalxd<add_sub>\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(minus:DI
+	  (minus:DI
+	    (match_operand:DI 1 "register_operand"                           " 0")
+	    (sign_extend:DI
+	      (mult:SI
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 2 "register_operand" " r")
+				  (parallel [(const_int 1)])))
+		(sign_extend:SI (vec_select:HI
+				  (match_operand:V2HI 3 "register_operand" " r")
+				  (parallel [(const_int 1)]))))))
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "smslxda1"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	(minus:DI
+	  (minus:DI
+	    (match_operand:DI 1 "register_operand"                           " 0")
+	      (sign_extend:DI
+		(mult:SI
+		  (sign_extend:SI (vec_select:HI
+				    (match_operand:V2HI 2 "register_operand" " r")
+				    (parallel [(const_int 1)])))
+		  (sign_extend:SI (vec_select:HI
+				    (match_operand:V2HI 3 "register_operand" " r")
+				    (parallel [(const_int 0)]))))))
+	  (sign_extend:DI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "smslxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; mada for synthetize smalda
+(define_insn_and_split "mada1"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+			    operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+			    operands[5], operands[6]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+(define_insn_and_split "mada2"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 1)]
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, operands[1], operands[2],
+			    operands[3], operands[4]));
+  emit_insn (gen_mulhisi3v (result1, operands[1], operands[2],
+			    operands[6], operands[5]));
+  emit_insn (gen_addsi3 (operands[0], result0, result1));
+  DONE;
+})
+
+;; sms for synthetize smalds
+(define_insn_and_split "sms1"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+				   operands[5], operands[6]))"
+
+{
+  return nds32_output_sms (operands[3], operands[4],
+			   operands[5], operands[6]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+			      operands[5], operands[6])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+		   operands[3], operands[4],
+		   operands[5], operands[6]);
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn_and_split "sms2"
+  [(set (match_operand:SI 0 "register_operand"                                       "=   r")
+	(minus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand"               "    r")
+			      (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))]
+  "NDS32_EXT_DSP_P ()
+   && (!reload_completed
+       || !nds32_need_split_sms_p (operands[3], operands[4],
+				   operands[6], operands[5]))"
+{
+  return nds32_output_sms (operands[3], operands[4],
+			   operands[6], operands[5]);
+}
+  "NDS32_EXT_DSP_P ()
+   && !reload_completed
+   && nds32_need_split_sms_p (operands[3], operands[4],
+			      operands[6], operands[5])"
+  [(const_int 1)]
+{
+  nds32_split_sms (operands[0], operands[1], operands[2],
+		   operands[3], operands[4],
+		   operands[6], operands[5]);
+  DONE;
+}
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(const_int 1)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 0)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmda\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmxda"
+  [(set (match_operand:SI 0 "register_operand"                          "=r")
+	(ss_plus:SI
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 1 "register_operand" "r")
+			      (parallel [(const_int 1)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_operand:V2HI 2 "register_operand" "r")
+			      (parallel [(const_int 0)]))))
+	  (mult:SI
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 1)
+			      (parallel [(const_int 0)])))
+	    (sign_extend:SI (vec_select:HI
+			      (match_dup 2)
+			      (parallel [(const_int 1)]))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmxda\t%0, %1, %2"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmada2"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmada\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_plus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmads"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmads\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmadrs"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmadrs\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmaxds"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_plus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmaxds\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_minus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 1)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 0)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsxda"
+  [(set (match_operand:SI 0 "register_operand"                           "=r")
+	(ss_minus:SI
+	  (match_operand:SI 1 "register_operand"                         " 0")
+	  (ss_minus:SI
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 2 "register_operand" " r")
+				(parallel [(const_int 1)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_operand:V2HI 3 "register_operand" " r")
+				(parallel [(const_int 0)]))))
+	    (mult:SI
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 2)
+				(parallel [(const_int 0)])))
+	      (sign_extend:SI (vec_select:HI
+				(match_dup 3)
+				(parallel [(const_int 1)])))))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsxda\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; smax[8|16] and umax[8|16]
+(define_insn "<opcode><mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"               "=r")
+	(sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+		     (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+;; smin[8|16] and umin[8|16]
+(define_insn "<opcode><mode>3"
+  [(set (match_operand:VQIHI 0 "register_operand"              "=r")
+	(sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r")
+		     (match_operand:VQIHI 2 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "<opcode><mode>3_bb"
+  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
+	(sumin_max:<VELT> (vec_select:<VELT>
+			    (match_operand:VQIHI 1 "register_operand" " r")
+			    (parallel [(const_int 0)]))
+			  (vec_select:<VELT>
+			    (match_operand:VQIHI 2 "register_operand" " r")
+			    (parallel [(const_int 0)]))))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "<opcode><bits>\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode><mode>3_tt"
+  [(set (match_operand:<VELT> 0 "register_operand"                    "=r")
+	(sumin_max:<VELT> (vec_select:<VELT>
+			    (match_operand:VQIHI 1 "register_operand" " r")
+			    (parallel [(const_int 1)]))
+			  (vec_select:<VELT>
+			    (match_operand:VQIHI 2 "register_operand" " r")
+			    (parallel [(const_int 1)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_<opcode><mode>3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotr<mode>_1 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (<VELT>mode, tmp, <MODE>mode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v4qi3_22"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+	(sumin_max:QI (vec_select:QI
+			(match_operand:V4QI 1 "register_operand" " r")
+			(parallel [(const_int 2)]))
+		      (vec_select:QI
+			(match_operand:V4QI 2 "register_operand" " r")
+			(parallel [(const_int 2)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_2 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v4qi3_33"
+  [(set (match_operand:QI 0 "register_operand"                   "=r")
+	(sumin_max:QI (vec_select:QI
+			(match_operand:V4QI 1 "register_operand" " r")
+			(parallel [(const_int 3)]))
+		      (vec_select:QI
+			(match_operand:V4QI 2 "register_operand" " r")
+			(parallel [(const_int 3)]))))]
+  "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (V4QImode);
+  emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_rotrv4qi_3 (tmp, tmp));
+  emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "<opcode>v2hi3_bbtt"
+  [(set (match_operand:V2HI 0 "register_operand"                         "=r")
+	(vec_merge:V2HI
+	  (vec_duplicate:V2HI
+	    (sumin_max:HI (vec_select:HI
+			    (match_operand:V2HI 1 "register_operand" " r")
+			    (parallel [(const_int 1)]))
+			  (vec_select:HI
+			    (match_operand:V2HI 2 "register_operand" " r")
+			    (parallel [(const_int 1)]))))
+	  (vec_duplicate:V2HI
+	    (sumin_max:HI (vec_select:HI
+			    (match_dup:V2HI 1)
+			    (parallel [(const_int 0)]))
+			  (vec_select:HI
+			    (match_dup:V2HI 2)
+			    (parallel [(const_int 0)]))))
+	  (const_int 2)))]
+  "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN"
+  "#"
+  "NDS32_EXT_DSP_P ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_<opcode>v2hi3 (operands[0], operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv"
+{
+})
+
+(define_insn "kabs<mode>2"
+  [(set (match_operand:VQIHI 0 "register_operand"                "=r")
+	(ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))]
+  "NDS32_EXT_DSP_P ()"
+  "kabs<bits>\t%0, %1"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "<su>mar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_3"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (extend:DI
+	    (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>mar64_4"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(plus:DI
+	  (extend:DI
+	  (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>mar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>msr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>msr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "<su>msr64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (extend:DI
+	    (mult:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "<su>msr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+;; kmar64, kmsr64, ukmar64 and ukmsr64
+(define_insn "kmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_plus:DI
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "kmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "kmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(ss_minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (sign_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (sign_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "kmsr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_1"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_plus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmar64_2"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_plus:DI
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))
+	  (match_operand:DI 1 "register_operand"     " 0")))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmar64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "ukmsr64"
+  [(set (match_operand:DI 0 "register_operand"       "=r")
+	(us_minus:DI
+	  (match_operand:DI 1 "register_operand"     " 0")
+	  (mult:DI
+	    (zero_extend:DI
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (zero_extend:DI
+	      (match_operand:SI 3 "register_operand" " r")))))]
+  "NDS32_EXT_DSP_P ()"
+  "ukmsr64\t%0, %2, %3"
+  [(set_attr "type"     "dmac")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick1"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 3 "register_operand" " r"))
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" " r")
+	      (not:SI (match_dup 3)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %2, %3"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick2"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (not:SI (match_dup 2))
+	      (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %1, %3, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick3"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand" " r")
+	      (not:SI (match_dup 1)))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick4"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand" " r")
+	      (match_operand:SI 2 "register_operand" " r"))
+	    (and:SI
+	      (not:SI (match_dup 1))
+	      (match_operand:SI 3 "register_operand" " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %2, %3, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick5"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand"         " r")
+	      (not:SI (match_operand:SI 2 "register_operand" " r")))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand"         " r")
+	      (match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick6"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (not:SI (match_operand:SI 1 "register_operand" " r"))
+	      (match_operand:SI 2 "register_operand"         " r"))
+	    (and:SI
+	      (match_operand:SI 3 "register_operand" " r")
+	      (match_dup 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick7"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (match_operand:SI 1 "register_operand"         " r")
+	      (not:SI (match_operand:SI 2 "register_operand" " r")))
+	    (and:SI
+	      (match_dup 2)
+	      (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %1, %2"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "bpick8"
+  [(set (match_operand:SI 0 "register_operand"               "=r")
+	  (ior:SI
+	    (and:SI
+	      (not:SI (match_operand:SI 1 "register_operand" " r"))
+	      (match_operand:SI 2 "register_operand"         " r"))
+	    (and:SI
+	      (match_dup 1)
+	      (match_operand:SI 3 "register_operand"         " r"))))]
+  "NDS32_EXT_DSP_P ()"
+  "bpick\t%0, %3, %2, %1"
+  [(set_attr "type"     "dbpick")
+   (set_attr "length"   "4")])
+
+(define_insn "sraiu"
+  [(set (match_operand:SI 0 "register_operand"                              "=   r, r")
+	(unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand"     "    r, r")
+				 (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))]
+		    UNSPEC_ROUND))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   srai.u\t%0, %1, %2
+   sra.u\t%0, %1, %2"
+  [(set_attr "type"   "daluround")
+   (set_attr "length" "4")])
+
+(define_insn "kssl"
+  [(set (match_operand:SI 0 "register_operand"                   "=   r, r")
+	(ss_ashift:SI (match_operand:SI 1 "register_operand"     "    r, r")
+		      (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   kslli\t%0, %1, %2
+   ksll\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")])
+
+(define_insn "kslraw_round"
+  [(set (match_operand:SI 0 "register_operand"                  "=r")
+	(if_then_else:SI
+	  (lt:SI (match_operand:SI 2 "register_operand"        " r")
+		 (const_int 0))
+	  (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r")
+				   (neg:SI (match_dup 2)))]
+		     UNSPEC_ROUND)
+	  (ss_ashift:SI (match_dup 1)
+			(match_dup 2))))]
+  "NDS32_EXT_DSP_P ()"
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "daluround")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "<shift>di3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(shift_rotate:DI (match_operand:DI 1 "register_operand" "")
+			 (match_operand:SI 2 "nds32_rimm6u_operand" "")))]
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  "#"
+  "NDS32_EXT_DSP_P () && !reload_completed"
+  [(const_int 0)]
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      nds32_split_<code>di3 (tmp, operands[1], operands[2]);
+      emit_move_insn (operands[0], tmp);
+    }
+  else
+    nds32_split_<code>di3 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "sclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "sclip32\t%0, %1, %2"
+  [(set_attr "type"   "dclip")
+   (set_attr "length" "4")]
+)
+
+(define_insn "uclip32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))]
+  "NDS32_EXT_DSP_P ()"
+  "uclip32\t%0, %1, %2"
+  [(set_attr "type"   "dclip")
+   (set_attr "length" "4")]
+)
+
+(define_insn "bitrev"
+  [(set (match_operand:SI 0 "register_operand"                 "=r,    r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"     " r,    r")
+		    (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")]
+		   UNSPEC_BITREV))]
+  ""
+  "@
+   bitrev\t%0, %1, %2
+   bitrevi\t%0, %1, %2"
+  [(set_attr "type"   "dalu")
+   (set_attr "length" "4")]
+)
+
+;; wext, wexti
+(define_insn "<su>wext"
+  [(set (match_operand:SI 0 "register_operand"                "=r,   r")
+	(truncate:SI
+	  (shiftrt:DI
+	    (match_operand:DI 1 "register_operand"            " r,   r")
+	    (match_operand:SI 2 "nds32_rimm5u_operand"        " r,Iu05"))))]
+  "NDS32_EXT_DSP_P ()"
+  "@
+   wext\t%0, %1, %2
+   wexti\t%0, %1, %2"
+  [(set_attr "type"     "dwext")
+   (set_attr "length"   "4")])
+
+;; 32-bit add/sub instruction: raddw and rsubw.
+(define_insn "r<opcode>si3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (plus_minus:DI
+	      (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "r<opcode>w\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
+
+;; 32-bit add/sub instruction: uraddw and ursubw.
+(define_insn "ur<opcode>si3"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (plus_minus:DI
+	      (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+	      (zero_extend:DI (match_operand:SI 2 "register_operand" " r")))
+	    (const_int 1))))]
+  "NDS32_EXT_DSP_P ()"
+  "ur<opcode>w\t%0, %1, %2"
+  [(set_attr "type"    "dalu")
+   (set_attr "length"  "4")])
diff --git a/gcc/config/nds32/nds32-elf.opt b/gcc/config/nds32/nds32-elf.opt
new file mode 100644
index 00000000000..afe6aadd089
--- /dev/null
+++ b/gcc/config/nds32/nds32-elf.opt
@@ -0,0 +1,16 @@
+mcmodel=
+Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM)
+Specify the address generation strategy for code model.
+
+Enum
+Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
+Known cmodel types (for use with the -mcmodel= option):
+
+EnumValue
+Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c
index 95c9586c3b6..26d2865d450 100644
--- a/gcc/config/nds32/nds32-fp-as-gp.c
+++ b/gcc/config/nds32/nds32-fp-as-gp.c
@@ -26,19 +26,256 @@
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
+#include "hard-reg-set.h"
+#include "tm_p.h"
+#include "rtl.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "insn-config.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "ira.h"
+#include "ira-int.h"
+#include "df.h"
+#include "tree-core.h"
+#include "tree-pass.h"
+#include "nds32-protos.h"
 
 /* ------------------------------------------------------------------------ */
 
+/* A helper function to check if this function should contain prologue.  */
+static bool
+nds32_have_prologue_p (void)
+{
+  int i;
+
+  for (i = 0; i < 28; i++)
+    if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
+      return true;
+
+  return (flag_pic
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+	  || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
+}
+
+static int
+nds32_get_symbol_count (void)
+{
+  int symbol_count = 0;
+  rtx_insn *insn;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  /* Counting the insn number which the addressing mode is symbol.  */
+	  if (single_set (insn) && nds32_symbol_load_store_p (insn))
+	    {
+	      rtx pattern = PATTERN (insn);
+	      rtx mem;
+	      gcc_assert (GET_CODE (pattern) == SET);
+	      if (GET_CODE (SET_SRC (pattern)) == REG )
+		mem = SET_DEST (pattern);
+	      else
+		mem = SET_SRC (pattern);
+
+	      /* We have only lwi37 and swi37 for fp-as-gp optimization,
+		 so don't count any other than SImode.
+		 MEM for QImode and HImode will wrap by ZERO_EXTEND
+		 or SIGN_EXTEND */
+	      if (GET_CODE (mem) == MEM)
+		symbol_count++;
+	    }
+	}
+    }
+
+  return symbol_count;
+}
+
 /* Function to determine whether it is worth to do fp_as_gp optimization.
-   Return 0: It is NOT worth to do fp_as_gp optimization.
-   Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization.
+   Return false: It is NOT worth to do fp_as_gp optimization.
+   Return true: It is APPROXIMATELY worth to do fp_as_gp optimization.
    Note that if it is worth to do fp_as_gp optimization,
    we MUST set FP_REGNUM ever live in this function.  */
-int
+static bool
 nds32_fp_as_gp_check_available (void)
 {
-  /* By default we return 0.  */
-  return 0;
+  basic_block bb;
+  basic_block exit_bb;
+  edge_iterator ei;
+  edge e;
+  bool first_exit_blocks_p;
+
+  /* If there exists ANY of following conditions,
+     we DO NOT perform fp_as_gp optimization:
+       1. TARGET_FORBID_FP_AS_GP is set
+	  regardless of the TARGET_FORCE_FP_AS_GP.
+       2. User explicitly uses 'naked'/'no_prologue' attribute.
+	  We use nds32_naked_function_p() to help such checking.
+       3. Not optimize for size.
+       4. Need frame pointer.
+       5. If $fp is already required to be saved,
+	  it means $fp is already choosen by register allocator.
+	  Thus we better not to use it for fp_as_gp optimization.
+       6. This function is a vararg function.
+	  DO NOT apply fp_as_gp optimization on this function
+	  because it may change and break stack frame.
+       7. The epilogue is empty.
+	  This happens when the function uses exit()
+	  or its attribute is no_return.
+	  In that case, compiler will not expand epilogue
+	  so that we have no chance to output .omit_fp_end directive.  */
+  if (TARGET_FORBID_FP_AS_GP
+      || nds32_naked_function_p (current_function_decl)
+      || !optimize_size
+      || frame_pointer_needed
+      || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+      || (cfun->stdarg == 1)
+      || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
+    return false;
+
+  /* Disable fp_as_gp if there is any infinite loop since the fp may
+     reuse in infinite loops by register rename.
+     For check infinite loops we should make sure exit_bb is post dominate
+     all other basic blocks if there is no infinite loops.  */
+  first_exit_blocks_p = true;
+  exit_bb = NULL;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+    {
+      /* More than one exit block also do not perform fp_as_gp optimization.  */
+      if (!first_exit_blocks_p)
+	return false;
+
+      exit_bb = e->src;
+      first_exit_blocks_p = false;
+    }
+
+  /* Not found exit_bb? just abort fp_as_gp!  */
+  if (!exit_bb)
+    return false;
+
+  /* Each bb should post dominate by exit_bb if there is no infinite loop! */
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      if (!dominated_by_p (CDI_POST_DOMINATORS,
+			   bb,
+			   exit_bb))
+	return false;
+    }
+
+  /* Now we can check the possibility of using fp_as_gp optimization.  */
+  if (TARGET_FORCE_FP_AS_GP)
+    {
+      /* User explicitly issues -mforce-fp-as-gp option.  */
+      return true;
+    }
+  else
+    {
+      /* In the following we are going to evaluate whether
+	 it is worth to do fp_as_gp optimization.  */
+      bool good_gain = false;
+      int symbol_count;
+
+      int threshold;
+
+      /* We check if there already requires prologue.
+	 Note that $gp will be saved in prologue for PIC code generation.
+	 After that, we can set threshold by the existence of prologue.
+	 Each fp-implied instruction will gain 2-byte code size
+	 from gp-aware instruction, so we have following heuristics.  */
+      if (flag_pic
+	  || nds32_have_prologue_p ())
+	{
+	  /* Have-prologue:
+	       Compiler already intends to generate prologue content,
+	       so the fp_as_gp optimization will only insert
+	       'la $fp,_FP_BASE_' instruction, which will be
+	       converted into 4-byte instruction at link time.
+	       The threshold is "3" symbol accesses, 2 + 2 + 2 > 4.  */
+	  threshold = 3;
+	}
+      else
+	{
+	  /* None-prologue:
+	       Compiler originally does not generate prologue content,
+	       so the fp_as_gp optimization will NOT ONLY insert
+	       'la $fp,_FP_BASE' instruction, but also causes
+	       push/pop instructions.
+	       If we are using v3push (push25/pop25),
+	       the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
+	       If we are using normal push (smw/lmw),
+	       the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4.  */
+	  threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
+	}
+
+      symbol_count = nds32_get_symbol_count ();
+
+      if (symbol_count >= threshold)
+	good_gain = true;
+
+      /* Enable fp_as_gp optimization when potential gain is good enough.  */
+      return good_gain;
+    }
+}
+
+static unsigned int
+nds32_fp_as_gp (void)
+{
+  bool fp_as_gp_p;
+  calculate_dominance_info (CDI_POST_DOMINATORS);
+  fp_as_gp_p = nds32_fp_as_gp_check_available ();
+
+  /* Here is a hack to IRA for enable/disable a hard register per function.
+     We *MUST* review this way after migrate gcc 4.9! */
+  if (fp_as_gp_p) {
+    SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+    df_set_regs_ever_live (FP_REGNUM, 1);
+  } else {
+    CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+  }
+
+  cfun->machine->fp_as_gp_p = fp_as_gp_p;
+
+  free_dominance_info (CDI_POST_DOMINATORS);
+  return 1;
+}
+
+const pass_data pass_data_nds32_fp_as_gp =
+{
+  RTL_PASS,				/* type */
+  "fp_as_gp",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  0					/* todo_flags_finish */
+};
+
+class pass_nds32_fp_as_gp : public rtl_opt_pass
+{
+public:
+  pass_nds32_fp_as_gp (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *)
+  {
+    return !TARGET_LINUX_ABI
+	   && TARGET_16_BIT
+	   && optimize_size;
+  }
+  unsigned int execute (function *) { return nds32_fp_as_gp (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_fp_as_gp (gcc::context *ctxt)
+{
+  return new pass_nds32_fp_as_gp (ctxt);
 }
 
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-fpu.md b/gcc/config/nds32/nds32-fpu.md
index 719b0428ced..9b844021a24 100644
--- a/gcc/config/nds32/nds32-fpu.md
+++ b/gcc/config/nds32/nds32-fpu.md
@@ -1,5 +1,5 @@
 ;; Machine description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-graywolf.md b/gcc/config/nds32/nds32-graywolf.md
new file mode 100644
index 00000000000..f0c98a6f75d
--- /dev/null
+++ b/gcc/config/nds32/nds32-graywolf.md
@@ -0,0 +1,471 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; ------------------------------------------------------------------------
+;; Define Graywolf pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_graywolf_machine")
+
+(define_cpu_unit "gw_ii_0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_ii_1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_ex_p0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_mm_p0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_wb_p0" "nds32_graywolf_machine")
+(define_cpu_unit "gw_ex_p1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_mm_p1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_wb_p1" "nds32_graywolf_machine")
+(define_cpu_unit "gw_iq_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_rf_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e1_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e2_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e3_p2" "nds32_graywolf_machine")
+(define_cpu_unit "gw_e4_p2" "nds32_graywolf_machine")
+
+(define_reservation "gw_ii" "gw_ii_0 | gw_ii_1")
+(define_reservation "gw_ex" "gw_ex_p0 | gw_ex_p1")
+(define_reservation "gw_mm" "gw_mm_p0 | gw_mm_p1")
+(define_reservation "gw_wb" "gw_wb_p0 | gw_wb_p1")
+
+(define_reservation "gw_ii_all" "gw_ii_0 + gw_ii_1")
+
+(define_insn_reservation "nds_gw_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_mmu" 1
+  (and (eq_attr "type" "mmu")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_alu" 1
+  (and (and (eq_attr "type" "alu")
+            (match_test "!nds32::movd44_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_movd44" 1
+  (and (and (eq_attr "type" "alu")
+            (match_test "nds32::movd44_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_alu_shift" 1
+  (and (eq_attr "type" "alu_shift")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex*2, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_pbsad" 1
+  (and (eq_attr "type" "pbsad")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex*3, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_pbsada" 1
+  (and (eq_attr "type" "pbsada")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex*3, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_load" 1
+  (and (and (eq_attr "type" "load")
+            (match_test "!nds32::post_update_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_2w" 1
+  (and (and (eq_attr "type" "load")
+            (match_test "nds32::post_update_insn_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store" 1
+  (and (and (eq_attr "type" "store")
+            (match_test "!nds32::store_offset_reg_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_3r" 1
+  (and (and (eq_attr "type" "store")
+            (match_test "nds32::store_offset_reg_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_2" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "2"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+            (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_2" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "2"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+            (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1")
+
+(define_insn_reservation "nds_gw_mul_fast1" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1")
+       (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mul_fast2" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2")
+       (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_0, gw_ex_p0*2, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mul_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mac_fast1" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1")
+       (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mac_fast2" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2")
+       (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_all, gw_ex_p0*2, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_mac_slow" 1
+  (and (match_test "nds32_mul_config == MUL_TYPE_SLOW")
+       (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "graywolf")))
+  "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_div" 1
+  (and (and (eq_attr "type" "div")
+            (match_test "!nds32::divmod_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_div_2w" 1
+  (and (and (eq_attr "type" "div")
+            (match_test "nds32::divmod_p (insn)"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_alu" 1
+  (and (eq_attr "type" "dalu")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ex, gw_mm, gw_wb")
+
+(define_insn_reservation "nds_gw_dsp_alu64" 1
+  (and (eq_attr "type" "dalu64")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_alu_round" 1
+  (and (eq_attr "type" "daluround")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_cmp" 1
+  (and (eq_attr "type" "dcmp")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_clip" 1
+  (and (eq_attr "type" "dclip")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_mul" 1
+  (and (eq_attr "type" "dmul")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_mac" 1
+  (and (eq_attr "type" "dmac")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_insb" 1
+  (and (eq_attr "type" "dinsb")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_pack" 1
+  (and (eq_attr "type" "dpack")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_bpick" 1
+  (and (eq_attr "type" "dbpick")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_dsp_wext" 1
+  (and (eq_attr "type" "dwext")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0")
+
+(define_insn_reservation "nds_gw_fpu_alu" 4
+  (and (eq_attr "type" "falu")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_muls" 4
+  (and (eq_attr "type" "fmuls")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_muld" 4
+  (and (eq_attr "type" "fmuld")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_macs" 4
+  (and (eq_attr "type" "fmacs")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*3, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_macd" 4
+  (and (eq_attr "type" "fmacd")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*4, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_divs" 4
+  (and (ior (eq_attr "type" "fdivs")
+	    (eq_attr "type" "fsqrts"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*14, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_divd" 4
+  (and (ior (eq_attr "type" "fdivd")
+	    (eq_attr "type" "fsqrtd"))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*28, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fast_alu" 2
+  (and (ior (eq_attr "type" "fcmp")
+	    (ior (eq_attr "type" "fabs")
+		 (ior (eq_attr "type" "fcpy")
+		      (eq_attr "type" "fcmov"))))
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmtsr" 1
+  (and (eq_attr "type" "fmtsr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmtdr" 1
+  (and (eq_attr "type" "fmtdr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmfsr" 1
+  (and (eq_attr "type" "fmfsr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_fmfdr" 1
+  (and (eq_attr "type" "fmfdr")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_load" 3
+  (and (eq_attr "type" "fload")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+(define_insn_reservation "nds_gw_fpu_store" 1
+  (and (eq_attr "type" "fstore")
+       (eq_attr "pipeline_model" "graywolf"))
+  "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2")
+
+;; FPU_ADDR_OUT -> FPU_ADDR_IN
+;; Main pipeline rules don't need this because those default latency is 1.
+(define_bypass 1
+  "nds_gw_fpu_load, nds_gw_fpu_store"
+  "nds_gw_fpu_load, nds_gw_fpu_store"
+  "nds32_gw_ex_to_ex_p"
+)
+
+;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU,
+;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+  "nds_gw_load, nds_gw_load_2w,\
+   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
+   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
+   nds_gw_div, nds_gw_div_2w,\
+   nds_gw_dsp_alu64, nds_gw_dsp_mul, nds_gw_dsp_mac,\
+   nds_gw_dsp_alu_round, nds_gw_dsp_bpick, nds_gw_dsp_wext"
+  "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\
+   nds_gw_pbsad, nds_gw_pbsada,\
+   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
+   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
+   nds_gw_branch,\
+   nds_gw_div, nds_gw_div_2w,\
+   nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\
+   nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
+   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
+   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\
+   nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\
+   nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\
+   nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\
+   nds_gw_mmu,\
+   nds_gw_dsp_alu, nds_gw_dsp_alu_round,\
+   nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\
+   nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\
+   nds_gw_dsp_wext, nds_gw_dsp_bpick"
+  "nds32_gw_mm_to_ex_p"
+)
+
+;; LMW(N, N)
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
+;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+  "nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
+   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
+   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12"
+  "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\
+   nds_gw_pbsad, nds_gw_pbsada,\
+   nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\
+   nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\
+   nds_gw_branch,\
+   nds_gw_div, nds_gw_div_2w,\
+   nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\
+   nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\
+   nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\
+   nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\
+   nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\
+   nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\
+   nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\
+   nds_gw_mmu,\
+   nds_gw_dsp_alu, nds_gw_dsp_alu_round,\
+   nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\
+   nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\
+   nds_gw_dsp_wext, nds_gw_dsp_bpick"
+  "nds32_gw_last_load_to_ex_p"
+)
diff --git a/gcc/config/nds32/nds32-intrinsic.c b/gcc/config/nds32/nds32-intrinsic.c
index b9bb2d995f7..c2ad927b05d 100644
--- a/gcc/config/nds32/nds32-intrinsic.c
+++ b/gcc/config/nds32/nds32-intrinsic.c
@@ -519,6 +519,7 @@ static struct builtin_description bdesc_noarg[] =
 {
   NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG)
   NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR)
+  NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV)
   NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP)
   NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS)
   NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int",
@@ -558,6 +559,31 @@ static struct builtin_description bdesc_1arg[] =
   NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF)
   NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp,
 			  "set_current_sp", SET_CURRENT_SP)
+  NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16)
+  NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16)
+  NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8)
+  NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8)
+  NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810)
+  NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810)
+  NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820)
+  NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820)
+  NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830)
+  NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830)
+  NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831)
+  NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831)
+  NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810)
+  NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810)
+  NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820)
+  NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820)
+  NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830)
+  NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830)
+  NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831)
+  NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831)
+  NDS32_BUILTIN(unspec_kabs, "kabs", KABS)
+  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16)
+  NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16)
+  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8)
+  NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8)
 };
 
 /* Intrinsics that take just one argument. and the argument is immediate.  */
@@ -593,6 +619,28 @@ static struct builtin_description bdesc_2arg[] =
   NDS32_BUILTIN(unspec_ffb, "ffb", FFB)
   NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM)
   NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM)
+  NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW)
+  NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH)
+  NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW)
+  NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH)
+  NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB)
+  NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB)
+  NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT)
+  NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT)
+  NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB)
+  NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB)
+  NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT)
+  NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT)
+  NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB)
+  NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB)
+  NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT)
+  NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT)
+  NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB)
+  NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB)
+  NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT)
+  NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT)
+  NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW)
+  NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U)
   NDS32_BUILTIN(rotrsi3, "rotr", ROTR)
   NDS32_BUILTIN(unspec_sva, "sva", SVA)
   NDS32_BUILTIN(unspec_svs, "svs", SVS)
@@ -603,7 +651,202 @@ static struct builtin_description bdesc_2arg[] =
   NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW)
   NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W)
   NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW)
-
+  NDS32_BUILTIN(addv2hi3, "add16", ADD16)
+  NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16)
+  NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16)
+  NDS32_BUILTIN(raddv2hi3, "radd16", RADD16)
+  NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16)
+  NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16)
+  NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16)
+  NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16)
+  NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16)
+  NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16)
+  NDS32_BUILTIN(subv2hi3, "sub16", SUB16)
+  NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16)
+  NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16)
+  NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16)
+  NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16)
+  NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16)
+  NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16)
+  NDS32_BUILTIN(cras16_1, "cras16", CRAS16)
+  NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16)
+  NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16)
+  NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16)
+  NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16)
+  NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16)
+  NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16)
+  NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16)
+  NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16)
+  NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16)
+  NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16)
+  NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16)
+  NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16)
+  NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16)
+  NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16)
+  NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16)
+  NDS32_BUILTIN(addv4qi3, "add8", ADD8)
+  NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8)
+  NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8)
+  NDS32_BUILTIN(raddv4qi3, "radd8", RADD8)
+  NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8)
+  NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8)
+  NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8)
+  NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8)
+  NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8)
+  NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8)
+  NDS32_BUILTIN(subv4qi3, "sub8", SUB8)
+  NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8)
+  NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8)
+  NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8)
+  NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8)
+  NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8)
+  NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8)
+  NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16)
+  NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16)
+  NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U)
+  NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U)
+  NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16)
+  NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(kslra16, "kslra16", KSLRA16)
+  NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16)
+  NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U)
+  NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U)
+  NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16)
+  NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16)
+  NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16)
+  NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16)
+  NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16)
+  NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16)
+  NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16)
+  NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16)
+  NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8)
+  NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8)
+  NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8)
+  NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8)
+  NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8)
+  NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8)
+  NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8)
+  NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8)
+  NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16)
+  NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16)
+  NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16)
+  NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16)
+  NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16)
+  NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16)
+  NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16)
+  NDS32_BUILTIN(khm16, "khm16", KHM16)
+  NDS32_BUILTIN(khm16, "v_khm16", V_KHM16)
+  NDS32_BUILTIN(khmx16, "khmx16", KHMX16)
+  NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16)
+  NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8)
+  NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8)
+  NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8)
+  NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8)
+  NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8)
+  NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8)
+  NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8)
+  NDS32_BUILTIN(raddsi3, "raddw", RADDW)
+  NDS32_BUILTIN(uraddsi3, "uraddw", URADDW)
+  NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW)
+  NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(pkbb, "pkbb16", PKBB16)
+  NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16)
+  NDS32_BUILTIN(pkbt, "pkbt16", PKBT16)
+  NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16)
+  NDS32_BUILTIN(pktb, "pktb16", PKTB16)
+  NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16)
+  NDS32_BUILTIN(pktt, "pktt16", PKTT16)
+  NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16)
+  NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL)
+  NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U)
+  NDS32_BUILTIN(smmwb, "smmwb", SMMWB)
+  NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB)
+  NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U)
+  NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U)
+  NDS32_BUILTIN(smmwt, "smmwt", SMMWT)
+  NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT)
+  NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U)
+  NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U)
+  NDS32_BUILTIN(smbb, "smbb", SMBB)
+  NDS32_BUILTIN(smbb, "v_smbb", V_SMBB)
+  NDS32_BUILTIN(smbt, "smbt", SMBT)
+  NDS32_BUILTIN(smbt, "v_smbt", V_SMBT)
+  NDS32_BUILTIN(smtt, "smtt", SMTT)
+  NDS32_BUILTIN(smtt, "v_smtt", V_SMTT)
+  NDS32_BUILTIN(kmda, "kmda", KMDA)
+  NDS32_BUILTIN(kmda, "v_kmda", V_KMDA)
+  NDS32_BUILTIN(kmxda, "kmxda", KMXDA)
+  NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA)
+  NDS32_BUILTIN(smds, "smds", SMDS)
+  NDS32_BUILTIN(smds, "v_smds", V_SMDS)
+  NDS32_BUILTIN(smdrs, "smdrs", SMDRS)
+  NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS)
+  NDS32_BUILTIN(smxdsv, "smxds", SMXDS)
+  NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS)
+  NDS32_BUILTIN(smal1, "smal", SMAL)
+  NDS32_BUILTIN(smal1, "v_smal", V_SMAL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(adddi3, "sadd64", SADD64)
+  NDS32_BUILTIN(adddi3, "uadd64", UADD64)
+  NDS32_BUILTIN(radddi3, "radd64", RADD64)
+  NDS32_BUILTIN(uradddi3, "uradd64", URADD64)
+  NDS32_BUILTIN(kadddi3, "kadd64", KADD64)
+  NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64)
+  NDS32_BUILTIN(subdi3, "ssub64", SSUB64)
+  NDS32_BUILTIN(subdi3, "usub64", USUB64)
+  NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64)
+  NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64)
+  NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64)
+  NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64)
+  NDS32_BUILTIN(smul16, "smul16", SMUL16)
+  NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16)
+  NDS32_BUILTIN(smulx16, "smulx16", SMULX16)
+  NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16)
+  NDS32_BUILTIN(umul16, "umul16", UMUL16)
+  NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16)
+  NDS32_BUILTIN(umulx16, "umulx16", UMULX16)
+  NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16)
+  NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL)
+  NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
+			  "put_unaligned_u16x2", UASTORE_U16)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi,
+			  "put_unaligned_s16x2", UASTORE_S16)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8)
+  NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8)
 };
 
 /* Two-argument intrinsics with an immediate second argument.  */
@@ -617,6 +860,22 @@ static struct builtin_description bdesc_2argimm[] =
   NDS32_BUILTIN(unspec_clips, "clips", CLIPS)
   NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ)
   NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ)
+  NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16)
+  NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16)
+  NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U)
+  NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U)
+  NDS32_BUILTIN(kslli16, "ksll16", KSLL16)
+  NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16)
+  NDS32_BUILTIN(sclip16, "sclip16", SCLIP16)
+  NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16)
+  NDS32_BUILTIN(uclip16, "uclip16", UCLIP16)
+  NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16)
+  NDS32_BUILTIN(sraiu, "sra_u", SRA_U)
+  NDS32_BUILTIN(kssl, "ksll", KSLL)
+  NDS32_BUILTIN(bitrev, "bitrev", BITREV)
+  NDS32_BUILTIN(wext, "wext", WEXT)
+  NDS32_BUILTIN(uclip32, "uclip32", UCLIP32)
+  NDS32_BUILTIN(sclip32, "sclip32", SCLIP32)
 };
 
 /* Intrinsics that take three arguments.  */
@@ -625,6 +884,67 @@ static struct builtin_description bdesc_3arg[] =
   NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA)
   NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE)
   NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP)
+  NDS32_BUILTIN(kmabb, "kmabb", KMABB)
+  NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB)
+  NDS32_BUILTIN(kmabt, "kmabt", KMABT)
+  NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT)
+  NDS32_BUILTIN(kmatt, "kmatt", KMATT)
+  NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT)
+  NDS32_BUILTIN(kmada, "kmada", KMADA)
+  NDS32_BUILTIN(kmada, "v_kmada", V_KMADA)
+  NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA)
+  NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA)
+  NDS32_BUILTIN(kmads, "kmads", KMADS)
+  NDS32_BUILTIN(kmads, "v_kmads", V_KMADS)
+  NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS)
+  NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS)
+  NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS)
+  NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS)
+  NDS32_BUILTIN(kmsda, "kmsda", KMSDA)
+  NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA)
+  NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA)
+  NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA)
+  NDS32_BUILTIN(bpick1, "bpick", BPICK)
+  NDS32_BUILTIN(smar64_1, "smar64", SMAR64)
+  NDS32_BUILTIN(smsr64, "smsr64", SMSR64)
+  NDS32_BUILTIN(umar64_1, "umar64", UMAR64)
+  NDS32_BUILTIN(umsr64, "umsr64", UMSR64)
+  NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64)
+  NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64)
+  NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64)
+  NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64)
+  NDS32_BUILTIN(smalbb, "smalbb", SMALBB)
+  NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB)
+  NDS32_BUILTIN(smalbt, "smalbt", SMALBT)
+  NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT)
+  NDS32_BUILTIN(smaltt, "smaltt", SMALTT)
+  NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT)
+  NDS32_BUILTIN(smalda1, "smalda", SMALDA)
+  NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA)
+  NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA)
+  NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA)
+  NDS32_BUILTIN(smalds1, "smalds", SMALDS)
+  NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS)
+  NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS)
+  NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS)
+  NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS)
+  NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS)
+  NDS32_BUILTIN(smslda1, "smslda", SMSLDA)
+  NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA)
+  NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA)
+  NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA)
+  NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB)
+  NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB)
+  NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U)
+  NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U)
+  NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT)
+  NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT)
+  NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U)
+  NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U)
+  NDS32_BUILTIN(kmmac, "kmmac", KMMAC)
+  NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U)
+  NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB)
+  NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U)
 };
 
 /* Three-argument intrinsics with an immediate third argument.  */
@@ -634,6 +954,7 @@ static struct builtin_description bdesc_3argimm[] =
   NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW)
   NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W)
   NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW)
+  NDS32_BUILTIN(insb, "insb", INSB)
 };
 
 /* Intrinsics that load a value.  */
@@ -676,6 +997,11 @@ nds32_expand_builtin_impl (tree exp,
   unsigned i;
   struct builtin_description *d;
 
+  if (!NDS32_EXT_DSP_P ()
+      && fcode > NDS32_BUILTIN_DSP_BEGIN
+      && fcode < NDS32_BUILTIN_DSP_END)
+    error ("don't support DSP extension instructions");
+
   switch (fcode)
     {
     /* FPU Register Transfer.  */
@@ -812,6 +1138,9 @@ nds32_expand_builtin_impl (tree exp,
     case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL:
       emit_insn (gen_cctl_l1d_wball_one_lvl());
       return target;
+    case NDS32_BUILTIN_CLROV:
+      emit_insn (gen_unspec_volatile_clrov ());
+      return target;
     case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT:
       emit_insn (gen_unspec_standby_no_wake_grant ());
       return target;
@@ -947,10 +1276,18 @@ nds32_init_builtins_impl (void)
 			NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE)
 
   /* Looking for return type and argument can be found in tree.h file.  */
+  tree ptr_char_type_node = build_pointer_type (char_type_node);
   tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node);
   tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node);
+  tree ptr_short_type_node = build_pointer_type (short_integer_type_node);
   tree ptr_uint_type_node = build_pointer_type (unsigned_type_node);
   tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node);
+  tree v4qi_type_node = build_vector_type (intQI_type_node, 4);
+  tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4);
+  tree v2hi_type_node = build_vector_type (intHI_type_node, 2);
+  tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2);
+  tree v2si_type_node = build_vector_type (intSI_type_node, 2);
+  tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2);
 
   /* Cache.  */
   ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC);
@@ -1050,6 +1387,31 @@ nds32_init_builtins_impl (void)
   ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM);
   ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM);
 
+  /* SATURATION  */
+  ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW);
+  ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW);
+  ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH);
+  ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH);
+  ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB);
+  ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB);
+  ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT);
+  ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT);
+  ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB);
+  ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB);
+  ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT);
+  ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT);
+  ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB);
+  ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB);
+  ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT);
+  ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT);
+  ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB);
+  ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB);
+  ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT);
+  ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT);
+  ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW);
+  ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U);
+  ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV);
+  ADD_NDS32_BUILTIN0 ("clrov", void, CLROV);
 
   /* ROTR  */
   ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR);
@@ -1109,4 +1471,384 @@ nds32_init_builtins_impl (void)
   ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED);
   ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED);
 
+  /* DSP Extension: SIMD 16bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16);
+  ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16);
+  ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16);
+  ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16);
+  ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16);
+  ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16);
+  ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16);
+  ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16);
+  ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16);
+  ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16);
+  ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16);
+  ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16);
+  ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16);
+  ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16);
+  ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16);
+  ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16);
+  ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16);
+  ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16);
+  ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16);
+  ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16);
+  ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16);
+  ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16);
+  ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16);
+  ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16);
+  ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16);
+  ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16);
+  ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16);
+  ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16);
+  ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16);
+  ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16);
+  ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16);
+  ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16);
+
+  /* DSP Extension: SIMD 8bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8);
+  ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8);
+  ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8);
+  ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8);
+  ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8);
+  ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8);
+  ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8);
+  ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8);
+  ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8);
+  ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8);
+  ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8);
+  ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8);
+  ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8);
+  ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8);
+  ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8);
+  ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8);
+  ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8);
+  ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8);
+  ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8);
+  ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8);
+  ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8);
+
+  /* DSP Extension: SIMD 16bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16);
+  ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16);
+  ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U);
+  ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16);
+  ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16);
+  ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U);
+  ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U);
+  ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16);
+  ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16);
+  ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16);
+  ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16);
+  ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16);
+  ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16);
+  ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U);
+  ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U);
+
+  /* DSP Extension: 16bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16);
+  ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16);
+  ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16);
+  ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16);
+  ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16);
+  ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16);
+
+  /* DSP Extension: 8bit Compare.  */
+  ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8);
+  ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8);
+  ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8);
+  ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8);
+  ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8);
+  ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8);
+
+  /* DSP Extension: SIMD 16bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16);
+  ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16);
+  ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16);
+  ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16);
+  ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16);
+  ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16);
+  ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16);
+  ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16);
+  ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16);
+  ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16);
+  ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16);
+  ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16);
+  ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16);
+  ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16);
+  ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16);
+  ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16);
+  ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16);
+  ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16);
+  ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16);
+  ADD_NDS32_BUILTIN2 ("smulx16",
+		      long_long_unsigned, unsigned, unsigned, SMULX16);
+  ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16);
+  ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16);
+  ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16);
+  ADD_NDS32_BUILTIN2 ("umulx16",
+		      long_long_unsigned, unsigned, unsigned, UMULX16);
+  ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16);
+
+  /* DSP Extension: SIMD 8bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8);
+  ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8);
+  ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8);
+  ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8);
+  ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8);
+  ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8);
+  ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8);
+  ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8);
+  ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8);
+  ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8);
+
+  /* DSP Extension: 8bit Unpacking.  */
+  ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810);
+  ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820);
+  ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830);
+  ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831);
+  ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810);
+  ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820);
+  ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830);
+  ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831);
+  ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831);
+
+  /* DSP Extension: 32bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW);
+  ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW);
+  ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW);
+  ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW);
+
+  /* DSP Extension: 32bit Shift.  */
+  ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U);
+  ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL);
+
+  /* DSP Extension: 16bit Packing.  */
+  ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16);
+  ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16);
+  ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16);
+  ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16);
+  ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16);
+  ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16);
+  ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16);
+  ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16);
+
+  /* DSP Extension: Signed MSW 32x32 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL);
+  ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U);
+  ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC);
+  ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U);
+  ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB);
+  ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U);
+  ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL);
+  ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U);
+
+  /* DSP Extension: Most Significant Word 32x16 Multiply and ADD.  */
+  ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB);
+  ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB);
+  ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U);
+  ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT);
+  ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT);
+  ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U);
+  ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U);
+  ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB);
+  ADD_NDS32_BUILTIN3 ("kmmawb_u",
+		      integer, integer, integer, unsigned, KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawb_u",
+		      integer, integer, integer, v2hi, V_KMMAWB_U);
+  ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT);
+  ADD_NDS32_BUILTIN3 ("kmmawt_u",
+		      integer, integer, integer, unsigned, KMMAWT_U);
+  ADD_NDS32_BUILTIN3 ("v_kmmawt_u",
+		      integer, integer, integer, v2hi, V_KMMAWT_U);
+
+  /* DSP Extension: Signed 16bit Multiply with ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB);
+  ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB);
+  ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT);
+  ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT);
+  ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT);
+  ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT);
+  ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA);
+  ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA);
+  ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA);
+  ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA);
+  ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS);
+  ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS);
+  ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS);
+  ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS);
+  ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS);
+  ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS);
+  ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB);
+  ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB);
+  ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT);
+  ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT);
+  ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT);
+  ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT);
+  ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA);
+  ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA);
+  ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA);
+  ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS);
+  ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS);
+  ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS);
+  ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS);
+  ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS);
+  ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS);
+  ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA);
+  ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA);
+  ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract.  */
+  ADD_NDS32_BUILTIN2 ("smal", long_long_integer,
+		      long_long_integer, unsigned, SMAL);
+  ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer,
+		      long_long_integer, v2hi, V_SMAL);
+
+  /* DSP Extension: 32bit MISC.  */
+  ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV);
+  ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT);
+  ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK);
+  ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB);
+
+  /* DSP Extension: 64bit Add and Subtract.  */
+  ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer,
+		      long_long_integer, long_long_integer, SADD64);
+  ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UADD64);
+  ADD_NDS32_BUILTIN2 ("radd64", long_long_integer,
+		      long_long_integer, long_long_integer, RADD64);
+  ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, URADD64);
+  ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer,
+		      long_long_integer, long_long_integer, KADD64);
+  ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UKADD64);
+  ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer,
+		      long_long_integer, long_long_integer, SSUB64);
+  ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, USUB64);
+  ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer,
+		      long_long_integer, long_long_integer, RSUB64);
+  ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, URSUB64);
+  ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer,
+		      long_long_integer, long_long_integer, KSUB64);
+  ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned,
+		      long_long_unsigned, long_long_unsigned, UKSUB64);
+
+  /* DSP Extension: 32bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smar64", long_long_integer,
+		      long_long_integer, integer, integer, SMAR64);
+  ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer,
+		      long_long_integer, integer, integer, SMSR64);
+  ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UMAR64);
+  ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UMSR64);
+  ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer,
+		      long_long_integer, integer, integer, KMAR64);
+  ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer,
+		      long_long_integer, integer, integer, KMSR64);
+  ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UKMAR64);
+  ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned,
+		      long_long_unsigned, unsigned, unsigned, UKMSR64);
+
+  /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract.  */
+  ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALBB);
+  ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALBB);
+  ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALBT);
+  ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALBT);
+  ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALTT);
+  ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALTT);
+  ADD_NDS32_BUILTIN3 ("smalda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDA);
+  ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDA);
+  ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALXDA);
+  ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALXDA);
+  ADD_NDS32_BUILTIN3 ("smalds", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDS);
+  ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDS);
+  ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALDRS);
+  ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALDRS);
+  ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMALXDS);
+  ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMALXDS);
+  ADD_NDS32_BUILTIN3 ("smslda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMSLDA);
+  ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMSLDA);
+  ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer,
+		      long_long_integer, unsigned, unsigned, SMSLXDA);
+  ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer,
+		      long_long_integer, v2hi, v2hi, V_SMSLXDA);
+
+  /* DSP Extension: augmented baseline.  */
+  ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32);
+  ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32);
+  ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS);
+
+  /* DSP Extension: vector type unaligned Load/Store  */
+  ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8);
+  ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort,
+		      u_v2hi, UASTORE_U16);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short,
+		      v2hi, UASTORE_S16);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar,
+		      u_v4qi, UASTORE_U8);
+  ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char,
+		      v4qi, UASTORE_S8);
 }
diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md
index 24e7c0bf4a1..c70a6fcc99b 100644
--- a/gcc/config/nds32/nds32-intrinsic.md
+++ b/gcc/config/nds32/nds32-intrinsic.md
@@ -1037,6 +1037,187 @@
    (set_attr "length" "4")]
 )
 
+;; SATURATION
+
+(define_insn "unspec_kaddw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "kaddw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ss_minus:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "ksubw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kaddh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KADDH))]
+  ""
+  "kaddh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSUBH))]
+  ""
+  "ksubh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kaddh_dsp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:SI 2 "register_operand" "r"))
+		    (const_int 15)] UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "kaddh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_ksubh_dsp"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r")
+			      (match_operand:SI 2 "register_operand" "r"))
+		    (const_int 15)] UNSPEC_CLIPS))]
+  "NDS32_EXT_DSP_P ()"
+  "ksubh\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))]
+  ""
+  "kdmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))]
+  ""
+  "kdmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))]
+  ""
+  "kdmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kdmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))]
+  ""
+  "kdmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))]
+  ""
+  "khmbb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmbt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))]
+  ""
+  "khmbt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtb"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))]
+  ""
+  "khmtb\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_khmtt"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r")
+		      (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))]
+  ""
+  "khmtt\t%0, %1, %2"
+  [(set_attr "type"    "mul")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslraw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))]
+  ""
+  "kslraw\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_kslrawu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))]
+  ""
+  "kslraw.u\t%0, %1, %2"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_rdov"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))]
+  ""
+  "rdov\t%0"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
+(define_insn "unspec_volatile_clrov"
+  [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)]
+  ""
+  "clrov"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")]
+)
+
 ;; System
 
 (define_insn "unspec_sva"
@@ -1415,22 +1596,17 @@
   if (TARGET_ISA_V3M)
     nds32_expand_unaligned_store (operands, DImode);
   else
-    emit_insn (gen_unaligned_store_dw (operands[0], operands[1]));
+    emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]),
+				       operands[1]));
   DONE;
 })
 
 (define_insn "unaligned_store_dw"
-  [(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
-	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
+  [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand"   "=Umw")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "   r")] UNSPEC_UASTORE_DW))]
   ""
 {
-  rtx otherops[3];
-  otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
-  otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
-  otherops[2] = operands[0];
-
-  output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
-  return "";
+  return nds32_output_smw_double_word (operands);
 }
   [(set_attr "type"   "store")
    (set_attr "length"     "4")]
@@ -1495,4 +1671,15 @@
   DONE;
 })
 
+;; abs alias kabs
+
+(define_insn "unspec_kabs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))]
+  ""
+  "kabs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
 ;; ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/nds32-isr.c b/gcc/config/nds32/nds32-isr.c
index 2c3aac7a256..db67a0e3666 100644
--- a/gcc/config/nds32/nds32-isr.c
+++ b/gcc/config/nds32/nds32-isr.c
@@ -43,7 +43,260 @@
    We use an array to record essential information for each vector.  */
 static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS];
 
-/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------- */
+/* FIXME:
+   FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+       __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+       __attribute__((exception("XXX;YYY;id=ZZZ")))
+       __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+   We provide several functions to parse the strings.  */
+
+static void
+nds32_interrupt_attribute_parse_string (const char *original_str,
+					const char *func_name,
+					unsigned int s_level)
+{
+  char target_str[100];
+  enum nds32_isr_save_reg save_reg;
+  enum nds32_isr_nested_type nested_type;
+
+  char *save_all_regs_str, *save_caller_regs_str;
+  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
+  char *id_str, *value_str;
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+
+  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
+	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
+  save_all_regs_str    = strstr (target_str, "save_all_regs");
+  save_caller_regs_str = strstr (target_str, "save_caller_regs");
+
+  /* Note that if no argument is found,
+     use NDS32_PARTIAL_SAVE by default.  */
+  if (save_all_regs_str)
+    save_reg = NDS32_SAVE_ALL;
+  else if (save_caller_regs_str)
+    save_reg = NDS32_PARTIAL_SAVE;
+  else
+    save_reg = NDS32_PARTIAL_SAVE;
+
+  /* 2. Detect 'nested'       : NDS32_NESTED
+	       'not_nested'   : NDS32_NOT_NESTED
+	       'ready_nested' : NDS32_NESTED_READY
+	       'critical'     : NDS32_CRITICAL */
+  nested_str       = strstr (target_str, "nested");
+  not_nested_str   = strstr (target_str, "not_nested");
+  ready_nested_str = strstr (target_str, "ready_nested");
+  critical_str     = strstr (target_str, "critical");
+
+  /* Note that if no argument is found,
+     use NDS32_NOT_NESTED by default.
+     Also, since 'not_nested' and 'ready_nested' both contains
+     'nested' string, we check 'nested' with lowest priority.  */
+  if (not_nested_str)
+    nested_type = NDS32_NOT_NESTED;
+  else if (ready_nested_str)
+    nested_type = NDS32_NESTED_READY;
+  else if (nested_str)
+    nested_type = NDS32_NESTED;
+  else if (critical_str)
+    nested_type = NDS32_CRITICAL;
+  else
+    nested_type = NDS32_NOT_NESTED;
+
+  /* 3. Traverse each id value and set corresponding information.  */
+  id_str = strstr (target_str, "id=");
+
+  /* If user forgets to assign 'id', issue an error message.  */
+  if (id_str == NULL)
+    error ("require id argument in the string");
+  /* Extract the value_str first.  */
+  id_str    = strtok (id_str, "=");
+  value_str = strtok (NULL, ";");
+
+  /* Pick up the first id value token.  */
+  value_str = strtok (value_str, ",");
+  while (value_str != NULL)
+    {
+      int i;
+      i = atoi (value_str);
+
+      /* For interrupt(0..63), the actual vector number is (9..72).  */
+      i = i + 9;
+      if (i < 9 || i > 72)
+	error ("invalid id value for interrupt attribute");
+
+      /* Setup nds32_isr_vectors[] array.  */
+      nds32_isr_vectors[i].category = NDS32_ISR_INTERRUPT;
+      strcpy (nds32_isr_vectors[i].func_name, func_name);
+      nds32_isr_vectors[i].save_reg = save_reg;
+      nds32_isr_vectors[i].nested_type = nested_type;
+      nds32_isr_vectors[i].security_level = s_level;
+
+      /* Fetch next token.  */
+      value_str = strtok (NULL, ",");
+    }
+
+  return;
+}
+
+static void
+nds32_exception_attribute_parse_string (const char *original_str,
+					const char *func_name,
+					unsigned int s_level)
+{
+  char target_str[100];
+  enum nds32_isr_save_reg save_reg;
+  enum nds32_isr_nested_type nested_type;
+
+  char *save_all_regs_str, *save_caller_regs_str;
+  char *nested_str, *not_nested_str, *ready_nested_str, *critical_str;
+  char *id_str, *value_str;
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+
+  /* 1. Detect 'save_all_regs'    : NDS32_SAVE_ALL
+	       'save_caller_regs' : NDS32_PARTIAL_SAVE */
+  save_all_regs_str    = strstr (target_str, "save_all_regs");
+  save_caller_regs_str = strstr (target_str, "save_caller_regs");
+
+  /* Note that if no argument is found,
+     use NDS32_PARTIAL_SAVE by default.  */
+  if (save_all_regs_str)
+    save_reg = NDS32_SAVE_ALL;
+  else if (save_caller_regs_str)
+    save_reg = NDS32_PARTIAL_SAVE;
+  else
+    save_reg = NDS32_PARTIAL_SAVE;
+
+  /* 2. Detect 'nested'       : NDS32_NESTED
+	       'not_nested'   : NDS32_NOT_NESTED
+	       'ready_nested' : NDS32_NESTED_READY
+	       'critical'     : NDS32_CRITICAL */
+  nested_str       = strstr (target_str, "nested");
+  not_nested_str   = strstr (target_str, "not_nested");
+  ready_nested_str = strstr (target_str, "ready_nested");
+  critical_str     = strstr (target_str, "critical");
+
+  /* Note that if no argument is found,
+     use NDS32_NOT_NESTED by default.
+     Also, since 'not_nested' and 'ready_nested' both contains
+     'nested' string, we check 'nested' with lowest priority.  */
+  if (not_nested_str)
+    nested_type = NDS32_NOT_NESTED;
+  else if (ready_nested_str)
+    nested_type = NDS32_NESTED_READY;
+  else if (nested_str)
+    nested_type = NDS32_NESTED;
+  else if (critical_str)
+    nested_type = NDS32_CRITICAL;
+  else
+    nested_type = NDS32_NOT_NESTED;
+
+  /* 3. Traverse each id value and set corresponding information.  */
+  id_str = strstr (target_str, "id=");
+
+  /* If user forgets to assign 'id', issue an error message.  */
+  if (id_str == NULL)
+    error ("require id argument in the string");
+  /* Extract the value_str first.  */
+  id_str    = strtok (id_str, "=");
+  value_str = strtok (NULL, ";");
+
+  /* Pick up the first id value token.  */
+  value_str = strtok (value_str, ",");
+  while (value_str != NULL)
+    {
+      int i;
+      i = atoi (value_str);
+
+      /* For exception(1..8), the actual vector number is (1..8).  */
+      if (i < 1 || i > 8)
+	error ("invalid id value for exception attribute");
+
+      /* Setup nds32_isr_vectors[] array.  */
+      nds32_isr_vectors[i].category = NDS32_ISR_EXCEPTION;
+      strcpy (nds32_isr_vectors[i].func_name, func_name);
+      nds32_isr_vectors[i].save_reg = save_reg;
+      nds32_isr_vectors[i].nested_type = nested_type;
+      nds32_isr_vectors[i].security_level = s_level;
+
+      /* Fetch next token.  */
+      value_str = strtok (NULL, ",");
+    }
+
+  return;
+}
+
+static void
+nds32_reset_attribute_parse_string (const char *original_str,
+				    const char *func_name)
+{
+  char target_str[100];
+  char *vectors_str, *nmi_str, *warm_str, *value_str;
+
+  /* Deal with reset attribute.  Its vector number is always 0.  */
+  nds32_isr_vectors[0].category = NDS32_ISR_RESET;
+
+
+  /* 1. Parse 'vectors=XXXX'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  vectors_str = strstr (target_str, "vectors=");
+  /* The total vectors = interrupt + exception numbers + reset.
+     There are 8 exception and 1 reset in nds32 architecture.
+     If user forgets to assign 'vectors', user default 16 interrupts.  */
+  if (vectors_str != NULL)
+    {
+      /* Extract the value_str.  */
+      vectors_str = strtok (vectors_str, "=");
+      value_str  = strtok (NULL, ";");
+      nds32_isr_vectors[0].total_n_vectors = atoi (value_str) + 8 + 1;
+    }
+  else
+    nds32_isr_vectors[0].total_n_vectors = 16 + 8 + 1;
+  strcpy (nds32_isr_vectors[0].func_name, func_name);
+
+
+  /* 2. Parse 'nmi_func=YYYY'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  nmi_str = strstr (target_str, "nmi_func=");
+  if (nmi_str != NULL)
+    {
+      /* Extract the value_str.  */
+      nmi_str = strtok (nmi_str, "=");
+      value_str  = strtok (NULL, ";");
+      strcpy (nds32_isr_vectors[0].nmi_name, value_str);
+    }
+
+  /* 3. Parse 'warm_func=ZZZZ'.  */
+
+  /* Copy original string into a character array so that
+     the string APIs can handle it.  */
+  strcpy (target_str, original_str);
+  warm_str = strstr (target_str, "warm_func=");
+  if (warm_str != NULL)
+    {
+      /* Extract the value_str.  */
+      warm_str = strtok (warm_str, "=");
+      value_str  = strtok (NULL, ";");
+      strcpy (nds32_isr_vectors[0].warm_name, value_str);
+    }
+
+  return;
+}
+/* ------------------------------------------------------------- */
 
 /* A helper function to emit section head template.  */
 static void
@@ -79,6 +332,15 @@ nds32_emit_isr_jmptbl_section (int vector_id)
   char section_name[100];
   char symbol_name[100];
 
+  /* A critical isr does not need jump table section because
+     its behavior is not performed by two-level handler.  */
+  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
+    {
+      fprintf (asm_out_file, "\t! The vector %02d is a critical isr !\n",
+			     vector_id);
+      return;
+    }
+
   /* Prepare jmptbl section and symbol name.  */
   snprintf (section_name, sizeof (section_name),
 	    ".nds32_jmptbl.%02d", vector_id);
@@ -99,7 +361,6 @@ nds32_emit_isr_vector_section (int vector_id)
   const char *c_str = "CATEGORY";
   const char *sr_str = "SR";
   const char *nt_str = "NT";
-  const char *vs_str = "VS";
   char first_level_handler_name[100];
   char section_name[100];
   char symbol_name[100];
@@ -147,30 +408,47 @@ nds32_emit_isr_vector_section (int vector_id)
     case NDS32_NESTED_READY:
       nt_str = "nr";
       break;
+    case NDS32_CRITICAL:
+      /* The critical isr is not performed by two-level handler.  */
+      nt_str = "";
+      break;
     }
 
-  /* Currently we have 4-byte or 16-byte size for each vector.
-     If it is 4-byte, the first level handler name has suffix string "_4b".  */
-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
-
   /* Now we can create first level handler name.  */
-  snprintf (first_level_handler_name, sizeof (first_level_handler_name),
-	    "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str);
+  if (nds32_isr_vectors[vector_id].security_level == 0)
+    {
+      /* For security level 0, use normal first level handler name.  */
+      snprintf (first_level_handler_name, sizeof (first_level_handler_name),
+		"_nds32_%s_%s_%s", c_str, sr_str, nt_str);
+    }
+  else
+    {
+      /* For security level 1-3, use corresponding spl_1, spl_2, or spl_3.  */
+      snprintf (first_level_handler_name, sizeof (first_level_handler_name),
+		"_nds32_spl_%d", nds32_isr_vectors[vector_id].security_level);
+    }
 
   /* Prepare vector section and symbol name.  */
   snprintf (section_name, sizeof (section_name),
 	    ".nds32_vector.%02d", vector_id);
   snprintf (symbol_name, sizeof (symbol_name),
-	    "_nds32_vector_%02d%s", vector_id, vs_str);
+	    "_nds32_vector_%02d", vector_id);
 
 
   /* Everything is ready.  We can start emit vector section content.  */
   nds32_emit_section_head_template (section_name, symbol_name,
 				    floor_log2 (nds32_isr_vector_size), false);
 
-  /* According to the vector size, the instructions in the
-     vector section may be different.  */
-  if (nds32_isr_vector_size == 4)
+  /* First we check if it is a critical isr.
+     If so, jump to user handler directly; otherwise, the instructions
+     in the vector section may be different according to the vector size.  */
+  if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL)
+    {
+      /* This block is for critical isr.  Jump to user handler directly.  */
+      fprintf (asm_out_file, "\tj\t%s ! jump to user handler directly\n",
+			     nds32_isr_vectors[vector_id].func_name);
+    }
+  else if (nds32_isr_vector_size == 4)
     {
       /* This block is for 4-byte vector size.
 	 Hardware $VID support is necessary and only one instruction
@@ -239,13 +517,11 @@ nds32_emit_isr_reset_content (void)
 {
   unsigned int i;
   unsigned int total_n_vectors;
-  const char *vs_str;
   char reset_handler_name[100];
   char section_name[100];
   char symbol_name[100];
 
   total_n_vectors = nds32_isr_vectors[0].total_n_vectors;
-  vs_str = (nds32_isr_vector_size == 4) ? "_4b" : "";
 
   fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n");
 
@@ -261,7 +537,7 @@ nds32_emit_isr_reset_content (void)
   /* Emit vector references.  */
   fprintf (asm_out_file, "\t ! references to vector section entries\n");
   for (i = 0; i < total_n_vectors; i++)
-    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str);
+    fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d\n", i);
 
   /* Emit jmptbl_00 section.  */
   snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00");
@@ -275,9 +551,9 @@ nds32_emit_isr_reset_content (void)
 
   /* Emit vector_00 section.  */
   snprintf (section_name, sizeof (section_name), ".nds32_vector.00");
-  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str);
+  snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00");
   snprintf (reset_handler_name, sizeof (reset_handler_name),
-	    "_nds32_reset%s", vs_str);
+	    "_nds32_reset");
 
   fprintf (asm_out_file, "\t! ....................................\n");
   nds32_emit_section_head_template (section_name, symbol_name,
@@ -323,12 +599,12 @@ void
 nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
 {
   int save_all_p, partial_save_p;
-  int nested_p, not_nested_p, nested_ready_p;
+  int nested_p, not_nested_p, nested_ready_p, critical_p;
   int intr_p, excp_p, reset_p;
 
   /* Initialize variables.  */
   save_all_p = partial_save_p = 0;
-  nested_p = not_nested_p = nested_ready_p = 0;
+  nested_p = not_nested_p = nested_ready_p = critical_p = 0;
   intr_p = excp_p = reset_p = 0;
 
   /* We must check at MOST one attribute to set save-reg.  */
@@ -347,8 +623,10 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
     not_nested_p = 1;
   if (lookup_attribute ("nested_ready", func_attrs))
     nested_ready_p = 1;
+  if (lookup_attribute ("critical", func_attrs))
+    critical_p = 1;
 
-  if ((nested_p + not_nested_p + nested_ready_p) > 1)
+  if ((nested_p + not_nested_p + nested_ready_p + critical_p) > 1)
     error ("multiple nested types attributes to function %qD", func_decl);
 
   /* We must check at MOST one attribute to
@@ -362,6 +640,17 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs)
 
   if ((intr_p + excp_p + reset_p) > 1)
     error ("multiple interrupt attributes to function %qD", func_decl);
+
+  /* Do not allow isr attributes under linux toolchain.  */
+  if (TARGET_LINUX_ABI && intr_p)
+      error ("cannot use interrupt attributes to function %qD "
+	     "under linux toolchain", func_decl);
+  if (TARGET_LINUX_ABI && excp_p)
+      error ("cannot use exception attributes to function %qD "
+	     "under linux toolchain", func_decl);
+  if (TARGET_LINUX_ABI && reset_p)
+      error ("cannot use reset attributes to function %qD "
+	     "under linux toolchain", func_decl);
 }
 
 /* Function to construct isr vectors information array.
@@ -373,15 +662,21 @@ nds32_construct_isr_vectors_information (tree func_attrs,
 					 const char *func_name)
 {
   tree save_all, partial_save;
-  tree nested, not_nested, nested_ready;
+  tree nested, not_nested, nested_ready, critical;
   tree intr, excp, reset;
 
+  tree secure;
+  tree security_level_list;
+  tree security_level;
+  unsigned int s_level;
+
   save_all     = lookup_attribute ("save_all", func_attrs);
   partial_save = lookup_attribute ("partial_save", func_attrs);
 
   nested       = lookup_attribute ("nested", func_attrs);
   not_nested   = lookup_attribute ("not_nested", func_attrs);
   nested_ready = lookup_attribute ("nested_ready", func_attrs);
+  critical     = lookup_attribute ("critical", func_attrs);
 
   intr  = lookup_attribute ("interrupt", func_attrs);
   excp  = lookup_attribute ("exception", func_attrs);
@@ -391,6 +686,63 @@ nds32_construct_isr_vectors_information (tree func_attrs,
   if (!intr && !excp && !reset)
     return;
 
+  /* At first, we need to retrieve security level.  */
+  secure = lookup_attribute ("secure", func_attrs);
+  if (secure != NULL)
+    {
+      security_level_list = TREE_VALUE (secure);
+      security_level = TREE_VALUE (security_level_list);
+      s_level = TREE_INT_CST_LOW (security_level);
+    }
+  else
+    {
+      /* If there is no secure attribute, the security level is set by
+	 nds32_isr_secure_level, which is controlled by -misr-secure=X option.
+	 By default nds32_isr_secure_level should be 0.  */
+      s_level = nds32_isr_secure_level;
+    }
+
+  /* ------------------------------------------------------------- */
+  /* FIXME:
+     FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+	 __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+	 __attribute__((exception("XXX;YYY;id=ZZZ")))
+	 __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+     If interrupt/exception/reset appears and its argument is a
+     STRING_CST, we will parse string with some auxiliary functions
+     which set necessary isr information in the nds32_isr_vectors[] array.
+     After that, we can return immediately to avoid new-syntax isr
+     information construction.  */
+  if (intr != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (intr));
+      nds32_interrupt_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					      func_name,
+					      s_level);
+      return;
+    }
+  if (excp != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (excp));
+      nds32_exception_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					      func_name,
+					      s_level);
+      return;
+    }
+  if (reset != NULL_TREE
+      && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST)
+    {
+      tree string_arg = TREE_VALUE (TREE_VALUE (reset));
+      nds32_reset_attribute_parse_string (TREE_STRING_POINTER (string_arg),
+					  func_name);
+      return;
+    }
+  /* ------------------------------------------------------------- */
+
   /* If we are here, either we have interrupt/exception,
      or reset attribute.  */
   if (intr || excp)
@@ -417,6 +769,9 @@ nds32_construct_isr_vectors_information (tree func_attrs,
 	  /* Add vector_number_offset to get actual vector number.  */
 	  vector_id = TREE_INT_CST_LOW (id) + vector_number_offset;
 
+	  /* Set security level.  */
+	  nds32_isr_vectors[vector_id].security_level = s_level;
+
 	  /* Enable corresponding vector and set function name.  */
 	  nds32_isr_vectors[vector_id].category = (intr)
 						  ? (NDS32_ISR_INTERRUPT)
@@ -436,6 +791,8 @@ nds32_construct_isr_vectors_information (tree func_attrs,
 	    nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED;
 	  else if (nested_ready)
 	    nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY;
+	  else if (critical)
+	    nds32_isr_vectors[vector_id].nested_type = NDS32_CRITICAL;
 
 	  /* Advance to next id.  */
 	  id_list = TREE_CHAIN (id_list);
@@ -492,7 +849,6 @@ nds32_construct_isr_vectors_information (tree func_attrs,
     }
 }
 
-/* A helper function to handle isr stuff at the beginning of asm file.  */
 void
 nds32_asm_file_start_for_isr (void)
 {
@@ -505,15 +861,14 @@ nds32_asm_file_start_for_isr (void)
       strcpy (nds32_isr_vectors[i].func_name, "");
       nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE;
       nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED;
+      nds32_isr_vectors[i].security_level = 0;
       nds32_isr_vectors[i].total_n_vectors = 0;
       strcpy (nds32_isr_vectors[i].nmi_name, "");
       strcpy (nds32_isr_vectors[i].warm_name, "");
     }
 }
 
-/* A helper function to handle isr stuff at the end of asm file.  */
-void
-nds32_asm_file_end_for_isr (void)
+void nds32_asm_file_end_for_isr (void)
 {
   int i;
 
@@ -547,6 +902,8 @@ nds32_asm_file_end_for_isr (void)
 	  /* Found one vector which is interupt or exception.
 	     Output its jmptbl and vector section content.  */
 	  fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i);
+	  fprintf (asm_out_file, "\t! security level: %d\n",
+		   nds32_isr_vectors[i].security_level);
 	  fprintf (asm_out_file, "\t! ------------------------------------\n");
 	  nds32_emit_isr_jmptbl_section (i);
 	  fprintf (asm_out_file, "\t! ....................................\n");
@@ -580,4 +937,65 @@ nds32_isr_function_p (tree func)
 	  || (t_reset != NULL_TREE));
 }
 
-/* ------------------------------------------------------------------------ */
+/* Return true if FUNC is a isr function with critical attribute.  */
+bool
+nds32_isr_function_critical_p (tree func)
+{
+  tree t_intr;
+  tree t_excp;
+  tree t_critical;
+
+  tree attrs;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    abort ();
+
+  attrs = DECL_ATTRIBUTES (func);
+
+  t_intr  = lookup_attribute ("interrupt", attrs);
+  t_excp  = lookup_attribute ("exception", attrs);
+
+  t_critical = lookup_attribute ("critical", attrs);
+
+  /* If both interrupt and exception attribute does not appear,
+     we can return false immediately.  */
+  if ((t_intr == NULL_TREE) && (t_excp == NULL_TREE))
+    return false;
+
+  /* Here we can guarantee either interrupt or ecxception attribute
+     does exist, so further check critical attribute.
+     If it also appears, we can return true.  */
+  if (t_critical != NULL_TREE)
+    return true;
+
+  /* ------------------------------------------------------------- */
+  /* FIXME:
+     FOR BACKWARD COMPATIBILITY, we need to handle string type.
+     If the string 'critical' appears in the interrupt/exception
+     string argument, we can return true.  */
+  if (t_intr != NULL_TREE || t_excp != NULL_TREE)
+    {
+      char target_str[100];
+      char *critical_str;
+      tree t_check;
+      tree string_arg;
+
+      t_check = t_intr ? t_intr : t_excp;
+      if (TREE_CODE (TREE_VALUE (TREE_VALUE (t_check))) == STRING_CST)
+	{
+	  string_arg = TREE_VALUE (TREE_VALUE (t_check));
+	  strcpy (target_str, TREE_STRING_POINTER (string_arg));
+	  critical_str = strstr (target_str, "critical");
+
+	  /* Found 'critical' string, so return true.  */
+	  if (critical_str)
+	    return true;
+	}
+    }
+  /* ------------------------------------------------------------- */
+
+  /* Other cases, this isr function is not critical type.  */
+  return false;
+}
+
+/* ------------------------------------------------------------- */
diff --git a/gcc/config/nds32/nds32-linux.opt b/gcc/config/nds32/nds32-linux.opt
new file mode 100644
index 00000000000..75ccd7625a2
--- /dev/null
+++ b/gcc/config/nds32/nds32-linux.opt
@@ -0,0 +1,16 @@
+mcmodel=
+Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE)
+Specify the address generation strategy for code model.
+
+Enum
+Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
+Known cmodel types (for use with the -mcmodel= option):
+
+EnumValue
+Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
+
+EnumValue
+Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
+
+EnumValue
+Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c
index 720e85a20eb..f157dce3366 100644
--- a/gcc/config/nds32/nds32-md-auxiliary.c
+++ b/gcc/config/nds32/nds32-md-auxiliary.c
@@ -39,6 +39,9 @@
 #include "expr.h"
 #include "emit-rtl.h"
 #include "explow.h"
+#include "stringpool.h"
+#include "attribs.h"
+
 
 /* ------------------------------------------------------------------------ */
 
@@ -261,6 +264,118 @@ output_cond_branch_compare_zero (int code, const char *suffix,
   output_asm_insn (pattern, operands);
 }
 
+static void
+nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p)
+{
+  rtx src_high_part;
+  rtx dst_high_part, dst_low_part;
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  src_high_part = nds32_di_high_part_subreg (src);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  if (CONST_INT_P (shiftamount))
+    {
+      if (INTVAL (shiftamount) < 32)
+	{
+	  if (logic_shift_p)
+	    {
+	      emit_insn (gen_uwext (dst_low_part, src,
+						  shiftamount));
+	      emit_insn (gen_lshrsi3 (dst_high_part, src_high_part,
+						     shiftamount));
+	    }
+	  else
+	    {
+	      emit_insn (gen_wext (dst_low_part, src,
+						 shiftamount));
+	      emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
+						     shiftamount));
+	    }
+	}
+      else
+	{
+	  rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
+
+	  if (logic_shift_p)
+	    {
+	      emit_insn (gen_lshrsi3 (dst_low_part, src_high_part,
+						    new_shift_amout));
+	      emit_move_insn (dst_high_part, const0_rtx);
+	    }
+	  else
+	    {
+	      emit_insn (gen_ashrsi3 (dst_low_part, src_high_part,
+						    new_shift_amout));
+	      emit_insn (gen_ashrsi3 (dst_high_part, src_high_part,
+						     GEN_INT (31)));
+	    }
+	}
+    }
+  else
+    {
+      rtx dst_low_part_l32, dst_high_part_l32;
+      rtx dst_low_part_g32, dst_high_part_g32;
+      rtx new_shift_amout, select_reg;
+      dst_low_part_l32 = gen_reg_rtx (SImode);
+      dst_high_part_l32 = gen_reg_rtx (SImode);
+      dst_low_part_g32 = gen_reg_rtx (SImode);
+      dst_high_part_g32 = gen_reg_rtx (SImode);
+      new_shift_amout = gen_reg_rtx (SImode);
+      select_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f)));
+
+      if (logic_shift_p)
+	{
+	  /*
+	     if (shiftamount < 32)
+	       dst_low_part = wext (src, shiftamount)
+	       dst_high_part = src_high_part >> shiftamount
+	     else
+	       dst_low_part = src_high_part >> (shiftamount & 0x1f)
+	       dst_high_part = 0
+	  */
+	  emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount));
+	  emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part,
+						     shiftamount));
+
+	  emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+	  emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part,
+						    new_shift_amout));
+	  emit_move_insn (dst_high_part_g32, const0_rtx);
+	}
+      else
+	{
+	  /*
+	     if (shiftamount < 32)
+	       dst_low_part = wext (src, shiftamount)
+	       dst_high_part = src_high_part >> shiftamount
+	     else
+	       dst_low_part = src_high_part >> (shiftamount & 0x1f)
+	       # shift 31 for sign extend
+	       dst_high_part = src_high_part >> 31
+	  */
+	  emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
+	  emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part,
+						     shiftamount));
+
+	  emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+	  emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part,
+						    new_shift_amout));
+	  emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part,
+						     GEN_INT (31)));
+	}
+
+      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+			      dst_low_part_l32, dst_low_part_g32));
+      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+			      dst_high_part_l32, dst_high_part_g32));
+  }
+}
+
 /* ------------------------------------------------------------------------ */
 
 /* Auxiliary function for expand RTL pattern.  */
@@ -1195,8 +1310,166 @@ nds32_emit_v3pop_fpr_callee_saved (int base)
     }
 }
 
+enum nds32_expand_result_type
+nds32_expand_extv (rtx *operands)
+{
+  gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3]));
+  HOST_WIDE_INT width = INTVAL (operands[2]);
+  HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (MEM_P (src)
+      && width == 32
+      && (bitpos % BITS_PER_UNIT)  == 0
+      && GET_MODE_BITSIZE (GET_MODE (dst)) == width)
+    {
+      rtx newmem = adjust_address (src, GET_MODE (dst),
+				   bitpos / BITS_PER_UNIT);
+
+      rtx base_addr = force_reg (Pmode, XEXP (newmem, 0));
+
+      emit_insn (gen_unaligned_loadsi (dst, base_addr));
+
+      return EXPAND_DONE;
+    }
+  return EXPAND_FAIL;
+}
+
+enum nds32_expand_result_type
+nds32_expand_insv (rtx *operands)
+{
+  gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2]));
+  HOST_WIDE_INT width = INTVAL (operands[1]);
+  HOST_WIDE_INT bitpos = INTVAL (operands[2]);
+  rtx dst = operands[0];
+  rtx src = operands[3];
+
+  if (MEM_P (dst)
+      && width == 32
+      && (bitpos % BITS_PER_UNIT)  == 0
+      && GET_MODE_BITSIZE (GET_MODE (src)) == width)
+    {
+      rtx newmem = adjust_address (dst, GET_MODE (src),
+				      bitpos / BITS_PER_UNIT);
+
+      rtx base_addr = force_reg (Pmode, XEXP (newmem, 0));
+
+      emit_insn (gen_unaligned_storesi (base_addr, src));
+
+      return EXPAND_DONE;
+    }
+  return EXPAND_FAIL;
+}
+
 /* ------------------------------------------------------------------------ */
 
+/* Function to generate PC relative jump table.
+   Refer to nds32.md for more details.
+
+   The following is the sample for the case that diff value
+   can be presented in '.short' size.
+
+     addi    $r1, $r1, -(case_lower_bound)
+     slti    $ta, $r1, (case_number)
+     beqz    $ta, .L_skip_label
+
+     la      $ta, .L35             ! get jump table address
+     lh      $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry
+     addi    $ta, $r1, $ta
+     jr5     $ta
+
+     ! jump table entry
+   L35:
+     .short  .L25-.L35
+     .short  .L26-.L35
+     .short  .L27-.L35
+     .short  .L28-.L35
+     .short  .L29-.L35
+     .short  .L30-.L35
+     .short  .L31-.L35
+     .short  .L32-.L35
+     .short  .L33-.L35
+     .short  .L34-.L35 */
+const char *
+nds32_output_casesi_pc_relative (rtx *operands)
+{
+  machine_mode mode;
+  rtx diff_vec;
+
+  diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1])));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  /* Step C: "t <-- operands[1]".  */
+  if (flag_pic)
+    {
+      output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands);
+      output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands);
+      output_asm_insn ("add\t$ta, $ta, $gp", operands);
+    }
+  else
+    output_asm_insn ("la\t$ta, %l1", operands);
+
+  /* Get the mode of each element in the difference vector.  */
+  mode = GET_MODE (diff_vec);
+
+  /* Step D: "z <-- (mem (plus (operands[0] << m) t))",
+     where m is 0, 1, or 2 to load address-diff value from table.  */
+  switch (mode)
+    {
+    case E_QImode:
+      output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands);
+      break;
+    case E_HImode:
+      output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands);
+      break;
+    case E_SImode:
+      output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Step E: "t <-- z + t".
+     Add table label_ref with address-diff value to
+     obtain target case address.  */
+  output_asm_insn ("add\t$ta, %2, $ta", operands);
+
+  /* Step F: jump to target with register t.  */
+  if (TARGET_16_BIT)
+    return "jr5\t$ta";
+  else
+    return "jr\t$ta";
+}
+
+/* Function to generate normal jump table.  */
+const char *
+nds32_output_casesi (rtx *operands)
+{
+  /* Step C: "t <-- operands[1]".  */
+  if (flag_pic)
+    {
+      output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands);
+      output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands);
+      output_asm_insn ("add\t$ta, $ta, $gp", operands);
+    }
+  else
+    output_asm_insn ("la\t$ta, %l1", operands);
+
+  /* Step D: "z <-- (mem (plus (operands[0] << 2) t))".  */
+  output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
+
+  /* No need to perform Step E, which is only used for
+     pc relative jump table.  */
+
+  /* Step F: jump to target with register z.  */
+  if (TARGET_16_BIT)
+    return "jr5\t%2";
+  else
+    return "jr\t%2";
+}
+
 /* Function to return memory format.  */
 enum nds32_16bit_address_type
 nds32_mem_format (rtx op)
@@ -1757,11 +2030,8 @@ nds32_output_stack_push (rtx par_rtx)
 
   /* If we step here, we are going to do v3push or multiple push operation.  */
 
-  /* The v3push/v3pop instruction should only be applied on
-     none-isr and none-variadic function.  */
-  if (TARGET_V3PUSH
-      && !nds32_isr_function_p (current_function_decl)
-      && (cfun->machine->va_args_size == 0))
+  /* Refer to nds32.h, where we comment when push25/pop25 are available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
     {
       /* For stack v3push:
 	   operands[0]: Re
@@ -1881,11 +2151,8 @@ nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED)
 
   /* If we step here, we are going to do v3pop or multiple pop operation.  */
 
-  /* The v3push/v3pop instruction should only be applied on
-     none-isr and none-variadic function.  */
-  if (TARGET_V3PUSH
-      && !nds32_isr_function_p (current_function_decl)
-      && (cfun->machine->va_args_size == 0))
+  /* Refer to nds32.h, where we comment when push25/pop25 are available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
     {
       /* For stack v3pop:
 	   operands[0]: Re
@@ -2022,77 +2289,6 @@ nds32_output_return (void)
   return "";
 }
 
-/* Function to generate PC relative jump table.
-   Refer to nds32.md for more details.
-
-   The following is the sample for the case that diff value
-   can be presented in '.short' size.
-
-     addi    $r1, $r1, -(case_lower_bound)
-     slti    $ta, $r1, (case_number)
-     beqz    $ta, .L_skip_label
-
-     la      $ta, .L35             ! get jump table address
-     lh      $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry
-     addi    $ta, $r1, $ta
-     jr5     $ta
-
-     ! jump table entry
-   L35:
-     .short  .L25-.L35
-     .short  .L26-.L35
-     .short  .L27-.L35
-     .short  .L28-.L35
-     .short  .L29-.L35
-     .short  .L30-.L35
-     .short  .L31-.L35
-     .short  .L32-.L35
-     .short  .L33-.L35
-     .short  .L34-.L35 */
-const char *
-nds32_output_casesi_pc_relative (rtx *operands)
-{
-  machine_mode mode;
-  rtx diff_vec;
-
-  diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1])));
-
-  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
-
-  /* Step C: "t <-- operands[1]".  */
-  output_asm_insn ("la\t$ta, %l1", operands);
-
-  /* Get the mode of each element in the difference vector.  */
-  mode = GET_MODE (diff_vec);
-
-  /* Step D: "z <-- (mem (plus (operands[0] << m) t))",
-     where m is 0, 1, or 2 to load address-diff value from table.  */
-  switch (mode)
-    {
-    case E_QImode:
-      output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands);
-      break;
-    case E_HImode:
-      output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands);
-      break;
-    case E_SImode:
-      output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
-      break;
-    default:
-      gcc_unreachable ();
-    }
-
-  /* Step E: "t <-- z + t".
-     Add table label_ref with address-diff value to
-     obtain target case address.  */
-  output_asm_insn ("add\t$ta, %2, $ta", operands);
-
-  /* Step F: jump to target with register t.  */
-  if (TARGET_16_BIT)
-    return "jr5\t$ta";
-  else
-    return "jr\t$ta";
-}
 
 /* output a float load instruction */
 const char *
@@ -2250,52 +2446,51 @@ nds32_output_float_store (rtx *operands)
   return "";
 }
 
-/* Function to generate normal jump table.  */
 const char *
-nds32_output_casesi (rtx *operands)
+nds32_output_smw_single_word (rtx *operands)
 {
-  /* Step C: "t <-- operands[1]".  */
-  output_asm_insn ("la\t$ta, %l1", operands);
-
-  /* Step D: "z <-- (mem (plus (operands[0] << 2) t))".  */
-  output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands);
-
-  /* No need to perform Step E, which is only used for
-     pc relative jump table.  */
+  char buff[100];
+  unsigned regno;
+  int enable4;
+  bool update_base_p;
+  rtx base_addr = operands[0];
+  rtx base_reg;
+  rtx otherops[2];
 
-  /* Step F: jump to target with register z.  */
-  if (TARGET_16_BIT)
-    return "jr5\t%2";
+  if (REG_P (XEXP (base_addr, 0)))
+    {
+      update_base_p = false;
+      base_reg = XEXP (base_addr, 0);
+    }
   else
-    return "jr\t%2";
-}
+    {
+      update_base_p = true;
+      base_reg = XEXP (XEXP (base_addr, 0), 0);
+    }
 
-/* Auxiliary functions for lwm/smw.  */
-bool
-nds32_valid_smw_lwm_base_p (rtx op)
-{
-  rtx base_addr;
+  const char *update_base = update_base_p ? "m" : "";
 
-  if (!MEM_P (op))
-    return false;
+  regno = REGNO (operands[1]);
 
-  base_addr = XEXP (op, 0);
+  otherops[0] = base_reg;
+  otherops[1] = operands[1];
 
-  if (REG_P (base_addr))
-    return true;
+  if (regno >= 28)
+    {
+      enable4 = nds32_regno_to_enable4 (regno);
+      sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4);
+    }
   else
     {
-      if (GET_CODE (base_addr) == POST_INC
-	  && REG_P (XEXP (base_addr, 0)))
-        return true;
+      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base);
     }
-
-  return false;
+  output_asm_insn (buff, otherops);
+  return "";
 }
 
 /* ------------------------------------------------------------------------ */
 const char *
-nds32_output_smw_single_word (rtx *operands)
+nds32_output_smw_double_word (rtx *operands)
 {
   char buff[100];
   unsigned regno;
@@ -2303,7 +2498,7 @@ nds32_output_smw_single_word (rtx *operands)
   bool update_base_p;
   rtx base_addr = operands[0];
   rtx base_reg;
-  rtx otherops[2];
+  rtx otherops[3];
 
   if (REG_P (XEXP (base_addr, 0)))
     {
@@ -2322,15 +2517,22 @@ nds32_output_smw_single_word (rtx *operands)
 
   otherops[0] = base_reg;
   otherops[1] = operands[1];
+  otherops[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);;
 
   if (regno >= 28)
     {
-      enable4 = nds32_regno_to_enable4 (regno);
+      enable4 = nds32_regno_to_enable4 (regno)
+		| nds32_regno_to_enable4 (regno + 1);
       sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4);
     }
+  else if (regno == 27)
+    {
+      enable4 = nds32_regno_to_enable4 (regno + 1);
+      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1, %x", update_base, enable4);
+    }
   else
     {
-      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base);
+      sprintf (buff, "smw.bi%s\t%%1, [%%0], %%2", update_base);
     }
   output_asm_insn (buff, otherops);
   return "";
@@ -2415,16 +2617,17 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode)
   if (mode == DImode)
     {
       /* Load doubleword, we need two registers to access.  */
-      reg[0] = simplify_gen_subreg (SImode, operands[0],
-				    GET_MODE (operands[0]), 0);
-      reg[1] = simplify_gen_subreg (SImode, operands[0],
-				    GET_MODE (operands[0]), 4);
+      reg[0] = nds32_di_low_part_subreg (operands[0]);
+      reg[1] = nds32_di_high_part_subreg (operands[0]);
       /* A register only store 4 byte.  */
       width = GET_MODE_SIZE (SImode) - 1;
     }
   else
     {
-      reg[0] = operands[0];
+      if (VECTOR_MODE_P (mode))
+	reg[0] = gen_reg_rtx (SImode);
+      else
+	reg[0] = operands[0];
     }
 
   for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
@@ -2466,6 +2669,8 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode)
 	  offset = offset + offset_adj;
 	}
     }
+    if (VECTOR_MODE_P (mode))
+      convert_move (operands[0], reg[0], false);
 }
 
 void
@@ -2499,16 +2704,20 @@ nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode)
   if (mode == DImode)
     {
       /* Load doubleword, we need two registers to access.  */
-      reg[0] = simplify_gen_subreg (SImode, operands[1],
-				    GET_MODE (operands[1]), 0);
-      reg[1] = simplify_gen_subreg (SImode, operands[1],
-				    GET_MODE (operands[1]), 4);
+      reg[0] = nds32_di_low_part_subreg (operands[1]);
+      reg[1] = nds32_di_high_part_subreg (operands[1]);
       /* A register only store 4 byte.  */
       width = GET_MODE_SIZE (SImode) - 1;
     }
   else
     {
-      reg[0] = operands[1];
+      if (VECTOR_MODE_P (mode))
+	{
+	  reg[0] = gen_reg_rtx (SImode);
+	  convert_move (reg[0], operands[1], false);
+	}
+      else
+	reg[0] = operands[1];
     }
 
   for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--)
@@ -2765,6 +2974,36 @@ nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands)
   return "";
 }
 
+const char *
+nds32_output_unpkd8 (rtx output, rtx input,
+		     rtx high_idx_rtx, rtx low_idx_rtx,
+		     bool signed_p)
+{
+  char pattern[100];
+  rtx output_operands[2];
+  HOST_WIDE_INT high_idx, low_idx;
+  high_idx = INTVAL (high_idx_rtx);
+  low_idx = INTVAL (low_idx_rtx);
+
+  gcc_assert (high_idx >= 0 && high_idx <= 3);
+  gcc_assert (low_idx >= 0 && low_idx <= 3);
+
+  /* We only have 10, 20, 30 and 31.  */
+  if ((low_idx != 0 || high_idx == 0) &&
+      !(low_idx == 1 && high_idx == 3))
+    return "#";
+
+  char sign_char = signed_p ? 's' : 'z';
+
+  sprintf (pattern,
+	   "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1",
+	   sign_char, high_idx, low_idx);
+  output_operands[0] = output;
+  output_operands[1] = input;
+  output_asm_insn (pattern, output_operands);
+  return "";
+}
+
 /* Return true if SYMBOL_REF X binds locally.  */
 
 static bool
@@ -2782,22 +3021,15 @@ nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call,
   char pattern[100];
   bool noreturn_p;
 
-  if (GET_CODE (symbol) == CONST)
-    {
-      symbol= XEXP (symbol, 0);
-
-      if (GET_CODE (symbol) == PLUS)
-        symbol = XEXP (symbol, 0);
-    }
-
-  gcc_assert (GET_CODE (symbol) == SYMBOL_REF
-	      || REG_P (symbol));
-
   if (nds32_long_call_p (symbol))
     strcpy (pattern, long_call);
   else
     strcpy (pattern, call);
 
+  if (flag_pic && CONSTANT_P (symbol)
+      && !nds32_symbol_binds_local_p (symbol))
+    strcat (pattern, "@PLT");
+
   if (align_p)
     strcat (pattern, "\n\t.align 2");
 
@@ -2815,6 +3047,91 @@ nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call,
   return "";
 }
 
+bool
+nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0,
+			rtx in0_idx1, rtx in1_idx1)
+{
+  /* smds or smdrs.  */
+  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
+      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
+      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
+    return false;
+
+  /* smxds.  */
+  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
+      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
+    return false;
+
+  return true;
+}
+
+const char *
+nds32_output_sms (rtx in0_idx0, rtx in1_idx0,
+		  rtx in0_idx1, rtx in1_idx1)
+{
+  if (nds32_need_split_sms_p (in0_idx0, in1_idx0,
+			      in0_idx1, in1_idx1))
+    return "#";
+  /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */
+
+  /* smds or smdrs.  */
+  if (INTVAL (in0_idx0) == INTVAL (in1_idx0)
+      && INTVAL (in0_idx1) == INTVAL (in1_idx1)
+      && INTVAL (in0_idx0) != INTVAL (in0_idx1))
+    {
+      if (INTVAL (in0_idx0) == 0)
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smds\t%0, %1, %2";
+	  else
+	    return "smdrs\t%0, %1, %2";
+	}
+      else
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smdrs\t%0, %1, %2";
+	  else
+	    return "smds\t%0, %1, %2";
+	}
+    }
+
+  if (INTVAL (in0_idx0) != INTVAL (in0_idx1)
+      && INTVAL (in1_idx0) != INTVAL (in1_idx1))
+    {
+      if (INTVAL (in0_idx0) == 1)
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smxds\t%0, %2, %1";
+	  else
+	    return "smxds\t%0, %1, %2";
+	}
+      else
+	{
+	  if (TARGET_BIG_ENDIAN)
+	    return "smxds\t%0, %1, %2";
+	  else
+	    return "smxds\t%0, %2, %1";
+	}
+    }
+
+  gcc_unreachable ();
+  return "";
+}
+
+void
+nds32_split_sms (rtx out, rtx in0, rtx in1,
+		 rtx in0_idx0, rtx in1_idx0,
+		 rtx in0_idx1, rtx in1_idx1)
+{
+  rtx result0 = gen_reg_rtx (SImode);
+  rtx result1 = gen_reg_rtx (SImode);
+  emit_insn (gen_mulhisi3v (result0, in0, in1,
+			    in0_idx0, in1_idx0));
+  emit_insn (gen_mulhisi3v (result1, in0, in1,
+			    in0_idx1, in1_idx1));
+  emit_insn (gen_subsi3 (out, result0, result1));
+}
+
 /* Spilt a doubleword instrucion to two single word instructions.  */
 void
 nds32_spilt_doubleword (rtx *operands, bool load_p)
@@ -2846,16 +3163,30 @@ nds32_spilt_doubleword (rtx *operands, bool load_p)
       /* generate low_part and high_part memory format:
 	   low_part:  (post_modify ((reg) (plus (reg) (const 4)))
 	   high_part: (post_modify ((reg) (plus (reg) (const -12))) */
-      low_part[mem] = gen_frame_mem (SImode,
-				     gen_rtx_POST_MODIFY (Pmode, sub_mem,
-							  gen_rtx_PLUS (Pmode,
-							  sub_mem,
-							  GEN_INT (4))));
-      high_part[mem] = gen_frame_mem (SImode,
-				      gen_rtx_POST_MODIFY (Pmode, sub_mem,
-							   gen_rtx_PLUS (Pmode,
-							   sub_mem,
-							   GEN_INT (-12))));
+      low_part[mem] = gen_rtx_MEM (SImode,
+				   gen_rtx_POST_MODIFY (Pmode, sub_mem,
+							gen_rtx_PLUS (Pmode,
+							sub_mem,
+							GEN_INT (4))));
+      high_part[mem] = gen_rtx_MEM (SImode,
+				    gen_rtx_POST_MODIFY (Pmode, sub_mem,
+							 gen_rtx_PLUS (Pmode,
+							 sub_mem,
+							 GEN_INT (-12))));
+    }
+  else if (GET_CODE (sub_mem) == POST_INC)
+    {
+      /* memory format is (post_inc (reg)),
+	 so that extract (reg) from the (post_inc (reg)) pattern.  */
+      sub_mem = XEXP (sub_mem, 0);
+
+      /* generate low_part and high_part memory format:
+	   low_part:  (post_inc (reg))
+	   high_part: (post_inc (reg)) */
+      low_part[mem] = gen_rtx_MEM (SImode,
+				   gen_rtx_POST_INC (Pmode, sub_mem));
+      high_part[mem] = gen_rtx_MEM (SImode,
+				    gen_rtx_POST_INC (Pmode, sub_mem));
     }
   else if (GET_CODE (sub_mem) == POST_MODIFY)
     {
@@ -2872,14 +3203,14 @@ nds32_spilt_doubleword (rtx *operands, bool load_p)
       /* Generate low_part and high_part memory format:
 	   low_part:  (post_modify ((reg) (plus (reg) (const)))
 	   high_part: ((plus (reg) (const 4))) */
-      low_part[mem] = gen_frame_mem (SImode,
-				     gen_rtx_POST_MODIFY (Pmode, post_mem,
-							  gen_rtx_PLUS (Pmode,
-							  post_mem,
-							  post_val)));
-      high_part[mem] = gen_frame_mem (SImode, plus_constant (Pmode,
-							     post_mem,
-							     4));
+      low_part[mem] = gen_rtx_MEM (SImode,
+				   gen_rtx_POST_MODIFY (Pmode, post_mem,
+							gen_rtx_PLUS (Pmode,
+							post_mem,
+							post_val)));
+      high_part[mem] = gen_rtx_MEM (SImode, plus_constant (Pmode,
+							   post_mem,
+							   4));
     }
   else
     {
@@ -2924,11 +3255,516 @@ nds32_spilt_doubleword (rtx *operands, bool load_p)
     }
 }
 
+void
+nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  rtx src_high_part, src_low_part;
+  rtx dst_high_part, dst_low_part;
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  src_high_part = nds32_di_high_part_subreg (src);
+  src_low_part = nds32_di_low_part_subreg (src);
+
+  /* We need to handle shift more than 32 bit!!!! */
+  if (CONST_INT_P (shiftamount))
+    {
+      if (INTVAL (shiftamount) < 32)
+	{
+	  rtx ext_start;
+	  ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode);
+
+	  emit_insn (gen_wext (dst_high_part, src, ext_start));
+	  emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount));
+	}
+      else
+	{
+	  rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode);
+
+	  emit_insn (gen_ashlsi3 (dst_high_part, src_low_part,
+						 new_shift_amout));
+
+	  emit_move_insn (dst_low_part, GEN_INT (0));
+	}
+    }
+  else
+    {
+      rtx dst_low_part_l32, dst_high_part_l32;
+      rtx dst_low_part_g32, dst_high_part_g32;
+      rtx new_shift_amout, select_reg;
+      dst_low_part_l32 = gen_reg_rtx (SImode);
+      dst_high_part_l32 = gen_reg_rtx (SImode);
+      dst_low_part_g32 = gen_reg_rtx (SImode);
+      dst_high_part_g32 = gen_reg_rtx (SImode);
+      new_shift_amout = gen_reg_rtx (SImode);
+      select_reg = gen_reg_rtx (SImode);
+
+      rtx ext_start;
+      ext_start = gen_reg_rtx (SImode);
+
+      /*
+	 if (shiftamount < 32)
+	   dst_low_part = src_low_part << shiftamout
+	   dst_high_part = wext (src, 32 - shiftamount)
+	   # wext can't handle wext (src, 32) since it's only take rb[0:4]
+	   # for extract.
+	   dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
+	 else
+	   dst_low_part = 0
+	   dst_high_part = src_low_part << shiftamount & 0x1f
+      */
+
+      emit_insn (gen_subsi3 (ext_start,
+			     gen_int_mode (32, SImode),
+			     shiftamount));
+      emit_insn (gen_wext (dst_high_part_l32, src, ext_start));
+
+      /* Handle for shiftamout == 0.  */
+      emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
+			      src_high_part, dst_high_part_l32));
+
+      emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount));
+
+      emit_move_insn (dst_low_part_g32, const0_rtx);
+      emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f)));
+      emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part,
+						 new_shift_amout));
+
+      emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+      emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+			      dst_low_part_l32, dst_low_part_g32));
+      emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+			      dst_high_part_l32, dst_high_part_g32));
+    }
+}
+
+void
+nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  nds32_split_shiftrtdi3 (dst, src, shiftamount, false);
+}
+
+void
+nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  nds32_split_shiftrtdi3 (dst, src, shiftamount, true);
+}
+
+void
+nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount)
+{
+  rtx dst_low_part_l32, dst_high_part_l32;
+  rtx dst_low_part_g32, dst_high_part_g32;
+  rtx select_reg, low5bit, low5bit_inv, minus32sa;
+  rtx dst_low_part_g32_tmph;
+  rtx dst_low_part_g32_tmpl;
+  rtx dst_high_part_l32_tmph;
+  rtx dst_high_part_l32_tmpl;
+
+  rtx src_low_part, src_high_part;
+  rtx dst_high_part, dst_low_part;
+
+  shiftamount = force_reg (SImode, shiftamount);
+
+  emit_insn (gen_andsi3 (shiftamount,
+			 shiftamount,
+			 gen_int_mode (0x3f, SImode)));
+
+  dst_high_part = nds32_di_high_part_subreg (dst);
+  dst_low_part = nds32_di_low_part_subreg (dst);
+
+  src_high_part = nds32_di_high_part_subreg (src);
+  src_low_part = nds32_di_low_part_subreg (src);
+
+  dst_low_part_l32 = gen_reg_rtx (SImode);
+  dst_high_part_l32 = gen_reg_rtx (SImode);
+  dst_low_part_g32 = gen_reg_rtx (SImode);
+  dst_high_part_g32 = gen_reg_rtx (SImode);
+  low5bit = gen_reg_rtx (SImode);
+  low5bit_inv = gen_reg_rtx (SImode);
+  minus32sa = gen_reg_rtx (SImode);
+  select_reg = gen_reg_rtx (SImode);
+
+  dst_low_part_g32_tmph = gen_reg_rtx (SImode);
+  dst_low_part_g32_tmpl = gen_reg_rtx (SImode);
+
+  dst_high_part_l32_tmph = gen_reg_rtx (SImode);
+  dst_high_part_l32_tmpl = gen_reg_rtx (SImode);
+
+  emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32)));
+
+  /* if shiftamount < 32
+       dst_low_part = wext(src, shiftamount)
+     else
+       dst_low_part = ((src_high_part >> (shiftamount & 0x1f))
+		       | (src_low_part << (32 - (shiftamount & 0x1f))))
+  */
+  emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode)));
+  emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit));
+
+  emit_insn (gen_wext (dst_low_part_l32, src, shiftamount));
+
+  emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit));
+  emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv));
+
+  emit_insn (gen_iorsi3 (dst_low_part_g32,
+			 dst_low_part_g32_tmpl,
+			 dst_low_part_g32_tmph));
+
+  emit_insn (gen_cmovnsi (dst_low_part, select_reg,
+			  dst_low_part_l32, dst_low_part_g32));
+
+  /* if shiftamount < 32
+       dst_high_part = ((src_high_part >> shiftamount)
+			| (src_low_part << (32 - shiftamount)))
+       dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part
+     else
+       dst_high_part = wext(src, shiftamount & 0x1f)
+  */
+
+  emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount));
+
+  emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount));
+  emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa));
+
+  emit_insn (gen_iorsi3 (dst_high_part_l32,
+			 dst_high_part_l32_tmpl,
+			 dst_high_part_l32_tmph));
+
+  emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount,
+			  src_high_part, dst_high_part_l32));
+
+  emit_insn (gen_wext (dst_high_part_g32, src, low5bit));
+
+  emit_insn (gen_cmovnsi (dst_high_part, select_reg,
+			  dst_high_part_l32, dst_high_part_g32));
+}
+
+/* Return true if OP contains a symbol reference.  */
+bool
+symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return true;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Expand PIC code for @GOTOFF and @GOT.
+
+  Example for @GOTOFF:
+
+    la $r0, symbol@GOTOFF
+      -> sethi $ta, hi20(symbol@GOTOFF)
+	 ori $ta, $ta, lo12(symbol@GOTOFF)
+	 add $r0, $ta, $gp
+
+  Example for @GOT:
+
+    la $r0, symbol@GOT
+      -> sethi $ta, hi20(symbol@GOT)
+	 ori $ta, $ta, lo12(symbol@GOT)
+	 lw  $r0, [$ta + $gp]
+*/
+rtx
+nds32_legitimize_pic_address (rtx x)
+{
+  rtx addr = x;
+  rtx reg = gen_reg_rtx (Pmode);
+  rtx pat;
+
+  if (GET_CODE (x) == LABEL_REF
+      || (GET_CODE (x) == SYMBOL_REF
+	  && (CONSTANT_POOL_ADDRESS_P (x)
+	      || SYMBOL_REF_LOCAL_P (x))))
+    {
+      addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOTOFF);
+      addr = gen_rtx_CONST (SImode, addr);
+      emit_insn (gen_sethi (reg, addr));
+      emit_insn (gen_lo_sum (reg, reg, addr));
+      x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx);
+    }
+  else if (GET_CODE (x) == SYMBOL_REF)
+    {
+      addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOT);
+      addr = gen_rtx_CONST (SImode, addr);
+      emit_insn (gen_sethi (reg, addr));
+      emit_insn (gen_lo_sum (reg, reg, addr));
+
+      x = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+					       reg));
+    }
+  else if (GET_CODE (x) == CONST)
+    {
+      /* We don't split constant in expand_pic_move because GOTOFF can combine
+	 the addend with the symbol.  */
+      addr = XEXP (x, 0);
+      gcc_assert (GET_CODE (addr) == PLUS);
+
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+
+      if ((GET_CODE (op0) == LABEL_REF
+	   || (GET_CODE (op0) == SYMBOL_REF
+	       && (CONSTANT_POOL_ADDRESS_P (op0)
+		   || SYMBOL_REF_LOCAL_P (op0))))
+	  && GET_CODE (op1) == CONST_INT)
+	{
+	  pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF);
+	  pat = gen_rtx_PLUS (Pmode, pat, op1);
+	  pat = gen_rtx_CONST (Pmode, pat);
+	  emit_insn (gen_sethi (reg, pat));
+	  emit_insn (gen_lo_sum (reg, reg, pat));
+	  x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx);
+	}
+      else if (GET_CODE (op0) == SYMBOL_REF
+	       && GET_CODE (op1) == CONST_INT)
+	{
+	  /* This is a constant offset from a @GOT symbol reference.  */
+	  addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT);
+	  addr = gen_rtx_CONST (SImode, addr);
+	  emit_insn (gen_sethi (reg, addr));
+	  emit_insn (gen_lo_sum (reg, reg, addr));
+	  addr = gen_const_mem (SImode, gen_rtx_PLUS (Pmode,
+						      pic_offset_table_rtx,
+						      reg));
+	  emit_move_insn (reg, addr);
+	  if (satisfies_constraint_Is15 (op1))
+	    x = gen_rtx_PLUS (Pmode, reg, op1);
+	  else
+	    {
+	      rtx tmp_reg = gen_reg_rtx (SImode);
+	      emit_insn (gen_movsi (tmp_reg, op1));
+	      x = gen_rtx_PLUS (Pmode, reg, tmp_reg);
+	    }
+	}
+      else
+	{
+	  /* Don't handle this pattern.  */
+	  debug_rtx (x);
+	  gcc_unreachable ();
+	}
+    }
+  return x;
+}
+
+void
+nds32_expand_pic_move (rtx *operands)
+{
+  rtx src;
+
+  src = nds32_legitimize_pic_address (operands[1]);
+  emit_move_insn (operands[0], src);
+}
+
+/* Expand ICT symbol.
+    Example for @ICT and ICT model=large:
+
+    la $r0, symbol@ICT
+      -> sethi $rt, hi20(symbol@ICT)
+	 lwi $r0, [$rt + lo12(symbol@ICT)]
+
+*/
+rtx
+nds32_legitimize_ict_address (rtx x)
+{
+  rtx symbol = x;
+  rtx addr = x;
+  rtx reg = gen_reg_rtx (Pmode);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF
+	      && nds32_indirect_call_referenced_p (x));
+
+  addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, symbol), UNSPEC_ICT);
+  addr = gen_rtx_CONST (SImode, addr);
+  emit_insn (gen_sethi (reg, addr));
+
+  x = gen_const_mem (SImode, gen_rtx_LO_SUM (Pmode, reg, addr));
+
+  return x;
+}
+
+void
+nds32_expand_ict_move (rtx *operands)
+{
+  rtx src = operands[1];
+
+  src = nds32_legitimize_ict_address (src);
+
+  emit_move_insn (operands[0], src);
+}
+
+/* Return true X is a indirect call symbol.  */
+bool
+nds32_indirect_call_referenced_p (rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_ICT)
+    x = XVECEXP (x, 0, 0);
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      tree decl = SYMBOL_REF_DECL (x);
+
+      return decl
+	     && (lookup_attribute("indirect_call",
+				  DECL_ATTRIBUTES(decl))
+		 != NULL);
+    }
+
+  return false;
+}
+
 /* Return true X is need use long call.  */
 bool
 nds32_long_call_p (rtx symbol)
 {
-  return TARGET_CMODEL_LARGE;
+  if (nds32_indirect_call_referenced_p (symbol))
+    return TARGET_ICT_MODEL_LARGE;
+  else
+    return TARGET_CMODEL_LARGE;
+}
+
+/* Return true if X contains a thread-local symbol.  */
+bool
+nds32_tls_referenced_p (rtx x)
+{
+  if (!targetm.have_tls)
+   return false;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = XEXP (XEXP (x, 0), 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
+    return true;
+
+  return false;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+rtx
+nds32_legitimize_tls_address (rtx x)
+{
+  rtx tmp_reg;
+  rtx tp_reg = gen_rtx_REG (Pmode, TP_REGNUM);
+  rtx pat, insns, reg0;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    switch (SYMBOL_REF_TLS_MODEL (x))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	/* Emit UNSPEC_TLS_DESC rather than expand rtl directly because spill
+	   may destroy the define-use chain anylysis to insert relax_hint.  */
+	if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC)
+	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSGD);
+	else
+	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLD);
+
+	pat = gen_rtx_CONST (SImode, pat);
+	reg0 = gen_rtx_REG (Pmode, 0);
+	/* If we can confirm all clobber reigsters, it doesn't have to use call
+	   instruction.  */
+	insns = emit_call_insn (gen_tls_desc (pat, GEN_INT (0)));
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insns), pic_offset_table_rtx);
+	RTL_CONST_CALL_P (insns) = 1;
+	tmp_reg = gen_reg_rtx (SImode);
+	emit_move_insn (tmp_reg, reg0);
+	x = tmp_reg;
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSIE);
+	tmp_reg  = gen_reg_rtx (SImode);
+	pat = gen_rtx_CONST (SImode, pat);
+	emit_insn (gen_tls_ie (tmp_reg, pat, GEN_INT (0)));
+	if (flag_pic)
+	  emit_use (pic_offset_table_rtx);
+	x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	/* Expand symbol_ref@TPOFF':
+	     sethi $ta, hi20(symbol_ref@TPOFF)
+	     ori   $ta, $ta, lo12(symbol_ref@TPOFF)
+	     add   $r0, $ta, $tp */
+	tmp_reg  = gen_reg_rtx (SImode);
+	pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLE);
+	pat = gen_rtx_CONST (SImode, pat);
+	emit_insn (gen_sethi (tmp_reg, pat));
+	emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+	x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+  else if (GET_CODE (x) == CONST)
+    {
+      rtx base, addend;
+      split_const (x, &base, &addend);
+
+      if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
+	{
+	  /* Expand symbol_ref@TPOFF':
+	     sethi $ta, hi20(symbol_ref@TPOFF + addend)
+	     ori   $ta, $ta, lo12(symbol_ref@TPOFF + addend)
+	     add   $r0, $ta, $tp */
+	  tmp_reg  = gen_reg_rtx (SImode);
+	  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, base), UNSPEC_TLSLE);
+	  pat = gen_rtx_PLUS (SImode, pat, addend);
+	  pat = gen_rtx_CONST (SImode, pat);
+	  emit_insn (gen_sethi (tmp_reg, pat));
+	  emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat));
+	  x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg);
+	}
+    }
+
+  return x;
+}
+
+void
+nds32_expand_tls_move (rtx *operands)
+{
+  rtx src = operands[1];
+  rtx base, addend;
+
+  if (CONSTANT_P (src))
+    split_const (src, &base, &addend);
+
+  if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
+    src = nds32_legitimize_tls_address (src);
+  else
+    {
+      src = nds32_legitimize_tls_address (base);
+      if (addend != const0_rtx)
+	{
+	  src = gen_rtx_PLUS (SImode, src, addend);
+	  src = force_operand (src, operands[0]);
+	}
+    }
+
+  emit_move_insn (operands[0], src);
 }
 
 void
@@ -2976,3 +3812,105 @@ nds32_expand_constant (machine_mode mode, HOST_WIDE_INT val,
       emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp));
     }
 }
+
+/* Auxiliary functions for lwm/smw.  */
+bool
+nds32_valid_smw_lwm_base_p (rtx op)
+{
+  rtx base_addr;
+
+  if (!MEM_P (op))
+    return false;
+
+  base_addr = XEXP (op, 0);
+
+  if (REG_P (base_addr))
+    return true;
+  else
+    {
+      if (GET_CODE (base_addr) == POST_INC
+	  && REG_P (XEXP (base_addr, 0)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Auxiliary functions for manipulation DI mode.  */
+rtx nds32_di_high_part_subreg(rtx reg)
+{
+  unsigned high_part_offset = subreg_highpart_offset (SImode, DImode);
+
+  return simplify_gen_subreg (
+	   SImode, reg,
+	   DImode, high_part_offset);
+}
+
+rtx nds32_di_low_part_subreg(rtx reg)
+{
+  unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode);
+
+  return simplify_gen_subreg (
+	   SImode, reg,
+	   DImode, low_part_offset);
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Auxiliary function for output TLS patterns.  */
+
+const char *
+nds32_output_tls_desc (rtx *operands)
+{
+  char pattern[1000];
+
+  if (TARGET_RELAX_HINT)
+    snprintf (pattern, sizeof (pattern),
+	      ".relax_hint %%1\n\tsethi $r0, hi20(%%0)\n\t"
+	      ".relax_hint %%1\n\tori $r0, $r0, lo12(%%0)\n\t"
+	      ".relax_hint %%1\n\tlw $r15, [$r0 + $gp]\n\t"
+	      ".relax_hint %%1\n\tadd $r0, $r0, $gp\n\t"
+	      ".relax_hint %%1\n\tjral $r15");
+  else
+    snprintf (pattern, sizeof (pattern),
+	      "sethi $r0, hi20(%%0)\n\t"
+	      "ori $r0, $r0, lo12(%%0)\n\t"
+	      "lw $r15, [$r0 + $gp]\n\t"
+	      "add $r0, $r0, $gp\n\t"
+	      "jral $r15");
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+const char *
+nds32_output_tls_ie (rtx *operands)
+{
+  char pattern[1000];
+
+  if (flag_pic)
+  {
+      if (TARGET_RELAX_HINT)
+	snprintf (pattern, sizeof (pattern),
+		  ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t"
+		  ".relax_hint %%2\n\tori %%0, %%0, lo12(%%1)\n\t"
+		  ".relax_hint %%2\n\tlw %%0, [%%0 + $gp]");
+      else
+	snprintf (pattern, sizeof (pattern),
+		  "sethi %%0, hi20(%%1)\n\t"
+		  "ori %%0, %%0, lo12(%%1)\n\t"
+		  "lw %%0, [%%0 + $gp]");
+  }
+  else
+    {
+      if (TARGET_RELAX_HINT)
+	snprintf (pattern, sizeof (pattern),
+		  ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t"
+		  ".relax_hint %%2\n\tlwi %%0, [%%0 + lo12(%%1)]");
+      else
+	snprintf (pattern, sizeof (pattern),
+		  "sethi %%0, hi20(%%1)\n\t"
+		  "lwi %%0, [%%0 + lo12(%%1)]");
+    }
+  output_asm_insn (pattern, operands);
+  return "";
+}
diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c
index 8dea13047b6..f6140e65130 100644
--- a/gcc/config/nds32/nds32-memory-manipulation.c
+++ b/gcc/config/nds32/nds32-memory-manipulation.c
@@ -257,8 +257,124 @@ static bool
 nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
 				       rtx size, rtx alignment)
 {
-  return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
-						  size, alignment);
+  rtx dst_base_reg, src_base_reg;
+  rtx dst_itr, src_itr;
+  rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
+  rtx dst_end;
+  rtx double_word_mode_loop, byte_mode_loop;
+  rtx tmp;
+  int start_regno;
+  bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
+  unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
+
+  if (TARGET_ISA_V3M && !align_to_4_bytes)
+    return 0;
+
+  if (TARGET_REDUCED_REGS)
+    start_regno = 2;
+  else
+    start_regno = 16;
+
+  dst_itr = gen_reg_rtx (Pmode);
+  src_itr = gen_reg_rtx (Pmode);
+  dst_end = gen_reg_rtx (Pmode);
+  tmp = gen_reg_rtx (QImode);
+
+  double_word_mode_loop = gen_label_rtx ();
+  byte_mode_loop = gen_label_rtx ();
+
+  dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
+  src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
+
+  if (total_bytes < 8)
+    {
+      /* Emit total_bytes less than 8 loop version of movmem.
+	add     $dst_end, $dst, $size
+	move    $dst_itr, $dst
+	.Lbyte_mode_loop:
+	lbi.bi  $tmp, [$src_itr], #1
+	sbi.bi  $tmp, [$dst_itr], #1
+	! Not readch upper bound. Loop.
+	bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
+
+      /* add     $dst_end, $dst, $size */
+      dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      /* move    $dst_itr, $dst
+	 move    $src_itr, $src */
+      emit_move_insn (dst_itr, dst_base_reg);
+      emit_move_insn (src_itr, src_base_reg);
+
+      /* .Lbyte_mode_loop: */
+      emit_label (byte_mode_loop);
+
+      /* lbi.bi  $tmp, [$src_itr], #1 */
+      nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
+
+      /* sbi.bi  $tmp, [$dst_itr], #1 */
+      nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
+      /* ! Not readch upper bound. Loop.
+	 bne     $dst_itr, $dst_end, .Lbyte_mode_loop */
+      emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
+			       SImode, 1, byte_mode_loop);
+      return true;
+    }
+  else if (total_bytes % 8 == 0)
+    {
+      /* Emit multiple of 8 loop version of movmem.
+
+	 add     $dst_end, $dst, $size
+	 move    $dst_itr, $dst
+	 move    $src_itr, $src
+
+	.Ldouble_word_mode_loop:
+	lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
+	smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
+	! move will delete after register allocation
+	move    $src_itr, $src_itr'
+	move    $dst_itr, $dst_itr'
+	! Not readch upper bound. Loop.
+	bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
+
+      /* add     $dst_end, $dst, $size */
+      dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
+			      NULL_RTX, 0, OPTAB_WIDEN);
+
+      /* move    $dst_itr, $dst
+	 move    $src_itr, $src */
+      emit_move_insn (dst_itr, dst_base_reg);
+      emit_move_insn (src_itr, src_base_reg);
+
+      /* .Ldouble_word_mode_loop: */
+      emit_label (double_word_mode_loop);
+      /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
+	 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
+      src_itr_m = src_itr;
+      dst_itr_m = dst_itr;
+      srcmem_m = srcmem;
+      dstmem_m = dstmem;
+      nds32_emit_mem_move_block (start_regno, 2,
+				 &dst_itr_m, &dstmem_m,
+				 &src_itr_m, &srcmem_m,
+				 true);
+      /* move    $src_itr, $src_itr'
+	 move    $dst_itr, $dst_itr' */
+      emit_move_insn (dst_itr, dst_itr_m);
+      emit_move_insn (src_itr, src_itr_m);
+
+      /* ! Not readch upper bound. Loop.
+	 bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
+      emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
+			       Pmode, 1, double_word_mode_loop);
+    }
+  else
+    {
+      /* Handle size greater than 8, and not a multiple of 8.  */
+      return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
+						      size, alignment);
+    }
+
+  return true;
 }
 
 static bool
@@ -433,10 +549,8 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
 /* Auxiliary function for expand setmem pattern.  */
 
 static rtx
-nds32_gen_dup_4_byte_to_word_value (rtx value)
+nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
 {
-  rtx value4word = gen_reg_rtx (SImode);
-
   gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
 
   if (CONST_INT_P (value))
@@ -449,36 +563,74 @@ nds32_gen_dup_4_byte_to_word_value (rtx value)
     }
   else
     {
-      /* ! prepare word
-	 andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
-	 slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
-	 or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
-	 slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
-	 or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */
-
-      rtx tmp1, tmp2, tmp3, tmp4, final_value;
-      tmp1 = expand_binop (SImode, and_optab, value,
-			   gen_int_mode (0xff, SImode),
-			   NULL_RTX, 0, OPTAB_WIDEN);
-      tmp2 = expand_binop (SImode, ashl_optab, tmp1,
-			   gen_int_mode (8, SImode),
-			   NULL_RTX, 0, OPTAB_WIDEN);
-      tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
-			   NULL_RTX, 0, OPTAB_WIDEN);
-      tmp4 = expand_binop (SImode, ashl_optab, tmp3,
-			   gen_int_mode (16, SImode),
-			   NULL_RTX, 0, OPTAB_WIDEN);
-
-      final_value = expand_binop (SImode, ior_optab, tmp3, tmp4,
-				  NULL_RTX, 0, OPTAB_WIDEN);
-      emit_move_insn (value4word, final_value);
+      if (NDS32_EXT_DSP_P ())
+	{
+	  /* ! prepare word
+	     insb    $tmp, $value, 1         ! $tmp  <- 0x0000abab
+	     pkbb16  $tmp6, $tmp2, $tmp2   ! $value4word  <- 0xabababab */
+	  rtx tmp = gen_reg_rtx (SImode);
+
+	  convert_move (tmp, value, true);
+
+	  emit_insn (
+	    gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp));
+
+	  emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp));
+	}
+      else
+	{
+	  /* ! prepare word
+	     andi    $tmp1, $value, 0xff       ! $tmp1  <- 0x000000ab
+	     slli    $tmp2, $tmp1, 8           ! $tmp2  <- 0x0000ab00
+	     or      $tmp3, $tmp1, $tmp2       ! $tmp3  <- 0x0000abab
+	     slli    $tmp4, $tmp3, 16          ! $tmp4  <- 0xabab0000
+	     or      $val4word, $tmp3, $tmp4   ! $value4word  <- 0xabababab  */
+
+	  rtx tmp1, tmp2, tmp3, tmp4;
+	  tmp1 = expand_binop (SImode, and_optab, value,
+			       gen_int_mode (0xff, SImode),
+			       NULL_RTX, 0, OPTAB_WIDEN);
+	  tmp2 = expand_binop (SImode, ashl_optab, tmp1,
+			       gen_int_mode (8, SImode),
+			       NULL_RTX, 0, OPTAB_WIDEN);
+	  tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2,
+			       NULL_RTX, 0, OPTAB_WIDEN);
+	  tmp4 = expand_binop (SImode, ashl_optab, tmp3,
+			       gen_int_mode (16, SImode),
+			       NULL_RTX, 0, OPTAB_WIDEN);
+
+	  emit_insn (gen_iorsi3 (value4word, tmp3, tmp4));
+	}
     }
 
   return value4word;
 }
 
 static rtx
-emit_setmem_word_loop (rtx itr, rtx size, rtx value)
+nds32_gen_dup_4_byte_to_word_value (rtx value)
+{
+  rtx value4word = gen_reg_rtx (SImode);
+  nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
+
+  return value4word;
+}
+
+static rtx
+nds32_gen_dup_8_byte_to_double_word_value (rtx value)
+{
+  rtx value4doubleword = gen_reg_rtx (DImode);
+
+  nds32_gen_dup_4_byte_to_word_value_aux (
+    value, nds32_di_low_part_subreg(value4doubleword));
+
+  emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
+		  nds32_di_low_part_subreg(value4doubleword));
+  return value4doubleword;
+}
+
+
+static rtx
+emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
 {
   rtx word_mode_label = gen_label_rtx ();
   rtx word_mode_end_label = gen_label_rtx ();
@@ -487,9 +639,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
   rtx word_mode_end = gen_reg_rtx (SImode);
   rtx size_for_word = gen_reg_rtx (SImode);
 
-  /* and     $size_for_word, $size, #~3  */
+  /* and     $size_for_word, $size, #~0x7  */
   size_for_word = expand_binop (SImode, and_optab, size,
-				gen_int_mode (~3, SImode),
+				gen_int_mode (~0x7, SImode),
 				NULL_RTX, 0, OPTAB_WIDEN);
 
   emit_move_insn (byte_mode_size, size);
@@ -501,8 +653,8 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
   word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
 				NULL_RTX, 0, OPTAB_WIDEN);
 
-  /* andi    $byte_mode_size, $size, 3  */
-  byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3),
+  /* andi    $byte_mode_size, $size, 0x7  */
+  byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
 				     NULL_RTX, 0, OPTAB_WIDEN);
 
   emit_move_insn (byte_mode_size, byte_mode_size_tmp);
@@ -512,9 +664,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
   /*   ! word-mode set loop
        smw.bim $value4word, [$dst_itr], $value4word, 0
        bne     $word_mode_end, $dst_itr, .Lword_mode  */
-  emit_insn (gen_unaligned_store_update_base_w (itr,
-						itr,
-						value));
+  emit_insn (gen_unaligned_store_update_base_dw (itr,
+						 itr,
+						 value));
   emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
 			   Pmode, 1, word_mode_label);
 
@@ -566,7 +718,7 @@ emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
 static bool
 nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
 {
-  rtx value4word;
+  rtx value4doubleword;
   rtx value4byte;
   rtx dst;
   rtx byte_mode_size;
@@ -609,7 +761,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
      or      $tmp3, $tmp1, $tmp2             ! $tmp3  <- 0x0000abab
      slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000
      or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab  */
-  value4word = nds32_gen_dup_4_byte_to_word_value (value);
+  value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
 
   /*   and     $size_for_word, $size, #-4
        beqz    $size_for_word, .Lword_mode_end
@@ -622,7 +774,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
        smw.bim $value4word, [$dst], $value4word, 0
        bne     $word_mode_end, $dst, .Lword_mode
      .Lword_mode_end:  */
-  byte_mode_size = emit_setmem_word_loop (dst, size, value4word);
+  byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
 
   /*   beqz    $byte_mode_size, .Lend
        add     $byte_mode_end, $dst, $byte_mode_size
@@ -633,8 +785,8 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
        bne     $byte_mode_end, $dst, .Lbyte_mode
      .Lend: */
 
-  value4byte = simplify_gen_subreg (QImode, value4word, SImode,
-				    subreg_lowpart_offset (QImode, SImode));
+  value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
+				    subreg_lowpart_offset (QImode, DImode));
 
   emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
 
@@ -651,14 +803,15 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
   rtx byte_loop_size = gen_reg_rtx (SImode);
   rtx remain_size = gen_reg_rtx (SImode);
   rtx new_base_reg;
-  rtx value4byte, value4word;
+  rtx value4byte, value4doubleword;
   rtx byte_mode_size;
   rtx last_byte_loop_label = gen_label_rtx ();
 
   size = force_reg (SImode, size);
 
-  value4word = nds32_gen_dup_4_byte_to_word_value (value);
-  value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0);
+  value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
+  value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
+				    subreg_lowpart_offset (QImode, DImode));
 
   emit_move_insn (byte_loop_size, size);
   emit_move_insn (byte_loop_base, base_reg);
@@ -686,9 +839,9 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
   emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
 
   /* Set memory word by word. */
-  byte_mode_size = emit_setmem_word_loop (new_base_reg,
-					  remain_size,
-					  value4word);
+  byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
+						remain_size,
+						value4doubleword);
 
   emit_move_insn (byte_loop_base, new_base_reg);
   emit_move_insn (byte_loop_size, byte_mode_size);
diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md
index a8f77175927..80746b19323 100644
--- a/gcc/config/nds32/nds32-multiple.md
+++ b/gcc/config/nds32/nds32-multiple.md
@@ -2854,6 +2854,25 @@
    (set_attr "length"              "4")]
 )
 
+(define_expand "unaligned_store_update_base_dw"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
+		   (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8)))
+	      (set (mem:DI (match_dup 1))
+		   (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])]
+  ""
+{
+  /* DO NOT emit unaligned_store_w_m immediately since web pass don't
+     recognize post_inc, try it again after GCC 5.0.
+     REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156  */
+  emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2]));
+  emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode)));
+  DONE;
+}
+  [(set_attr "type"   "store_multiple")
+   (set_attr "combo"               "2")
+   (set_attr "length"              "4")]
+)
+
 (define_insn "*stmsi25"
   [(match_parallel 0 "nds32_store_multiple_operation"
     [(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
diff --git a/gcc/config/nds32/nds32-n10.md b/gcc/config/nds32/nds32-n10.md
new file mode 100644
index 00000000000..0dd76da1ef8
--- /dev/null
+++ b/gcc/config/nds32/nds32-n10.md
@@ -0,0 +1,439 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N10 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n10_machine")
+
+;; ------------------------------------------------------------------------
+;; Pipeline Stages
+;; ------------------------------------------------------------------------
+;; IF - Instruction Fetch
+;; II - Instruction Issue / Instruction Decode
+;; EX - Instruction Execution
+;; MM - Memory Execution
+;; WB - Instruction Retire / Result Write-Back
+
+(define_cpu_unit "n10_ii" "nds32_n10_machine")
+(define_cpu_unit "n10_ex" "nds32_n10_machine")
+(define_cpu_unit "n10_mm" "nds32_n10_machine")
+(define_cpu_unit "n10_wb" "nds32_n10_machine")
+(define_cpu_unit "n10f_iq" "nds32_n10_machine")
+(define_cpu_unit "n10f_rf" "nds32_n10_machine")
+(define_cpu_unit "n10f_e1" "nds32_n10_machine")
+(define_cpu_unit "n10f_e2" "nds32_n10_machine")
+(define_cpu_unit "n10f_e3" "nds32_n10_machine")
+(define_cpu_unit "n10f_e4" "nds32_n10_machine")
+
+(define_insn_reservation "nds_n10_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_mmu" 1
+  (and (eq_attr "type" "mmu")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_alu" 1
+  (and (eq_attr "type" "alu")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_alu_shift" 1
+  (and (eq_attr "type" "alu_shift")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_pbsad" 1
+  (and (eq_attr "type" "pbsad")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex*3, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_pbsada" 1
+  (and (eq_attr "type" "pbsada")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex*3, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_load" 1
+  (and (match_test "nds32::load_single_p (insn)")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_store" 1
+  (and (match_test "nds32::store_single_p (insn)")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_1" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "1")))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_2" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (ior (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32::load_double_p (insn)")))
+  "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_3" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "3")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_4" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "4")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_5" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "5")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_6" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "6")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_7" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "7")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_N" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "load_multiple")
+	    (match_test "get_attr_combo (insn) >= 8")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_1" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "1")))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_2" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (ior (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "2"))
+       (match_test "nds32::store_double_p (insn)")))
+  "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_3" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "3")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_4" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "4")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_5" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "5")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_6" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "6")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_7" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "7")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_N" 1
+  (and (eq_attr "pipeline_model" "n10")
+       (and (eq_attr "type" "store_multiple")
+	    (match_test "get_attr_combo (insn) >= 8")))
+  "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_mul" 1
+  (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_mac" 1
+       (and (eq_attr "type" "mac")
+	    (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_div" 1
+  (and (eq_attr "type" "div")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex*34, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_alu" 1
+  (and (eq_attr "type" "dalu")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_alu64" 1
+  (and (eq_attr "type" "dalu64")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_alu_round" 1
+  (and (eq_attr "type" "daluround")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_cmp" 1
+  (and (eq_attr "type" "dcmp")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_clip" 1
+  (and (eq_attr "type" "dclip")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_mul" 1
+  (and (eq_attr "type" "dmul")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_mac" 1
+  (and (eq_attr "type" "dmac")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_insb" 1
+  (and (eq_attr "type" "dinsb")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_pack" 1
+  (and (eq_attr "type" "dpack")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_bpick" 1
+  (and (eq_attr "type" "dbpick")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_wext" 1
+  (and (eq_attr "type" "dwext")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_fpu_alu" 4
+  (and (eq_attr "type" "falu")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_muls" 4
+  (and (eq_attr "type" "fmuls")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_muld" 4
+  (and (eq_attr "type" "fmuld")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_macs" 4
+  (and (eq_attr "type" "fmacs")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*3, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_macd" 4
+  (and (eq_attr "type" "fmacd")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*4, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_divs" 4
+  (and (ior (eq_attr "type" "fdivs")
+	    (eq_attr "type" "fsqrts"))
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*14, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_divd" 4
+  (and (ior (eq_attr "type" "fdivd")
+	    (eq_attr "type" "fsqrtd"))
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*28, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fast_alu" 2
+  (and (ior (eq_attr "type" "fcmp")
+	    (ior (eq_attr "type" "fabs")
+		 (ior (eq_attr "type" "fcpy")
+		      (eq_attr "type" "fcmov"))))
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmtsr" 4
+  (and (eq_attr "type" "fmtsr")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmtdr" 4
+  (and (eq_attr "type" "fmtdr")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmfsr" 2
+  (and (eq_attr "type" "fmfsr")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmfdr" 2
+  (and (eq_attr "type" "fmfdr")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_load" 3
+  (and (eq_attr "type" "fload")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_store" 1
+  (and (eq_attr "type" "fstore")
+       (eq_attr "pipeline_model" "n10"))
+  "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+;; ------------------------------------------------------------------------
+;; Comment Notations and Bypass Rules
+;; ------------------------------------------------------------------------
+;; Producers (LHS)
+;;   LD
+;;     Load data from the memory and produce the loaded data. The result is
+;;     ready at MM.
+;;   LMW(N, M)
+;;     There are N micro-operations within an instruction that loads multiple
+;;     words. The result produced by the M-th micro-operation is sent to
+;;     consumers. The result is ready at MM.
+;;   MUL, MAC
+;;     Compute data in the multiply-adder and produce the data. The result
+;;     is ready at MM.
+;;   DIV
+;;     Compute data in the divider and produce the data. The result is ready
+;;     at MM.
+;;
+;; Consumers (RHS)
+;;   ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU
+;;     Require operands at EX.
+;;   ALU_SHIFT_Rb
+;;     An ALU-SHIFT instruction consists of a shift micro-operation followed
+;;     by an arithmetic micro-operation. The operand Rb is used by the first
+;;     micro-operation, and there are some latencies if data dependency occurs.
+;;   MAC_RaRb
+;;     A MAC instruction does multiplication at EX and does accumulation at MM,
+;;     so the operand Rt is required at MM, and operands Ra and Rb are required
+;;     at EX.
+;;   ADDR_IN
+;;     If an instruction requires an address as its input operand, the address
+;;     is required at EX.
+;;   ST
+;;     A store instruction requires its data at MM.
+;;   SMW(N, M)
+;;     There are N micro-operations within an instruction that stores multiple
+;;     words. Each M-th micro-operation requires its data at MM.
+;;   BR
+;;     If a branch instruction is conditional, its input data is required at EX.
+
+;; FPU_ADDR_OUT -> FPU_ADDR_IN
+;; Main pipeline rules don't need this because those default latency is 1.
+(define_bypass 1
+  "nds_n10_fpu_load, nds_n10_fpu_store"
+  "nds_n10_fpu_load, nds_n10_fpu_store"
+  "nds32_n10_ex_to_ex_p"
+)
+
+;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU,
+;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+  "nds_n10_load, nds_n10_mul, nds_n10_mac, nds_n10_div,\
+   nds_n10_dsp_alu64, nds_n10_dsp_mul, nds_n10_dsp_mac,\
+   nds_n10_dsp_alu_round, nds_n10_dsp_bpick, nds_n10_dsp_wext"
+  "nds_n10_alu, nds_n10_alu_shift,\
+   nds_n10_pbsad, nds_n10_pbsada,\
+   nds_n10_mul, nds_n10_mac, nds_n10_div,\
+   nds_n10_branch,\
+   nds_n10_load, nds_n10_store,\
+   nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
+   nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
+   nds_n10_load_multiple_7, nds_n10_load_multiple_N,\
+   nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\
+   nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\
+   nds_n10_store_multiple_7, nds_n10_store_multiple_N,\
+   nds_n10_mmu,\
+   nds_n10_dsp_alu, nds_n10_dsp_alu_round,\
+   nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\
+   nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\
+   nds_n10_dsp_wext, nds_n10_dsp_bpick"
+  "nds32_n10_mm_to_ex_p"
+)
+
+;; LMW(N, N)
+;;   -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
+;;      DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+  "nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
+   nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
+   nds_n10_load_multiple_7, nds_n10_load_multiple_N"
+  "nds_n10_alu, nds_n10_alu_shift,\
+   nds_n10_pbsad, nds_n10_pbsada,\
+   nds_n10_mul, nds_n10_mac, nds_n10_div,\
+   nds_n10_branch,\
+   nds_n10_load, nds_n10_store,\
+   nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
+   nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
+   nds_n10_load_multiple_7, nds_n10_load_multiple_N,\
+   nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\
+   nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\
+   nds_n10_store_multiple_7, nds_n10_store_multiple_N,\
+   nds_n10_mmu,\
+   nds_n10_dsp_alu, nds_n10_dsp_alu_round,\
+   nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\
+   nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\
+   nds_n10_dsp_wext, nds_n10_dsp_bpick"
+  "nds32_n10_last_load_to_ex_p"
+)
diff --git a/gcc/config/nds32/nds32-n13.md b/gcc/config/nds32/nds32-n13.md
new file mode 100644
index 00000000000..ca7546bc2a7
--- /dev/null
+++ b/gcc/config/nds32/nds32-n13.md
@@ -0,0 +1,401 @@
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N13 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n13_machine")
+
+;; ------------------------------------------------------------------------
+;; Pipeline Stages
+;; ------------------------------------------------------------------------
+;; F1 - Instruction Fetch First
+;;   Instruction Tag/Data Arrays
+;;   ITLB Address Translation
+;;   Branch Target Buffer Prediction
+;; F2 - Instruction Fetch Second
+;;   Instruction Cache Hit Detection
+;;   Cache Way Selection
+;;   Inustruction Alignment
+;; I1 - Instruction Issue First / Instruction Decode
+;;   Instruction Cache Replay Triggering
+;;   32/16-Bit Instruction Decode
+;;   Return Address Stack Prediction
+;; I2 - Instruction Issue Second / Register File Access
+;;   Instruction Issue Logic
+;;   Register File Access
+;; E1 - Instruction Execute First / Address Generation / MAC First
+;;   Data Access Address generation
+;;   Multiply Operation
+;; E2 - Instruction Execute Second / Data Access First / MAC Second /
+;;      ALU Execute
+;;   Skewed ALU
+;;   Branch/Jump/Return Resolution
+;;   Data Tag/Data arrays
+;;   DTLB address translation
+;;   Accumulation Operation
+;; E3 - Instruction Execute Third / Data Access Second
+;;   Data Cache Hit Detection
+;;   Cache Way Selection
+;;   Data Alignment
+;; E4 - Instruction Execute Fourth / Write Back
+;;   Interruption Resolution
+;;   Instruction Retire
+;;   Register File Write Back
+
+(define_cpu_unit "n13_i1" "nds32_n13_machine")
+(define_cpu_unit "n13_i2" "nds32_n13_machine")
+(define_cpu_unit "n13_e1" "nds32_n13_machine")
+(define_cpu_unit "n13_e2" "nds32_n13_machine")
+(define_cpu_unit "n13_e3" "nds32_n13_machine")
+(define_cpu_unit "n13_e4" "nds32_n13_machine")
+
+(define_insn_reservation "nds_n13_unknown" 1
+  (and (eq_attr "type" "unknown")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_misc" 1
+  (and (eq_attr "type" "misc")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_mmu" 1
+  (and (eq_attr "type" "mmu")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_alu" 1
+  (and (eq_attr "type" "alu")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_alu_shift" 1
+  (and (eq_attr "type" "alu_shift")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_pbsad" 1
+  (and (eq_attr "type" "pbsad")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_pbsada" 1
+  (and (eq_attr "type" "pbsada")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load" 1
+  (and (match_test "nds32::load_single_p (insn)")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_store" 1
+  (and (match_test "nds32::store_single_p (insn)")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_1" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_2" 1
+  (and (ior (and (eq_attr "type" "load_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32::load_double_p (insn)"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_3" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_4" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_5" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_6" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_7" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_8" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_12" 1
+  (and (and (eq_attr "type" "load_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_1" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "1"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_2" 1
+  (and (ior (and (eq_attr "type" "store_multiple")
+		 (eq_attr "combo" "2"))
+	    (match_test "nds32::store_double_p (insn)"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_3" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "3"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_4" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "4"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_5" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "5"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_6" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "6"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_7" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "7"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_8" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "8"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_12" 1
+  (and (and (eq_attr "type" "store_multiple")
+	    (eq_attr "combo" "12"))
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+;; The multiplier at E1 takes two cycles.
+(define_insn_reservation "nds_n13_mul" 1
+  (and (eq_attr "type" "mul")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_mac" 1
+  (and (eq_attr "type" "mac")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
+
+;; The cycles consumed at E2 are 32 - CLZ(abs(Ra)) + 2,
+;; so the worst case is 34.
+(define_insn_reservation "nds_n13_div" 1
+  (and (eq_attr "type" "div")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "pipeline_model" "n13"))
+  "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+;; ------------------------------------------------------------------------
+;; Comment Notations and Bypass Rules
+;; ------------------------------------------------------------------------
+;; Producers (LHS)
+;;   LD
+;;     Load data from the memory and produce the loaded data. The result is
+;;     ready at E3.
+;;   LMW(N, M)
+;;     There are N micro-operations within an instruction that loads multiple
+;;     words. The result produced by the M-th micro-operation is sent to
+;;     consumers. The result is ready at E3.
+;;   ADDR_OUT
+;;     Most load/store instructions can produce an address output if updating
+;;     the base register is required. The result is ready at E2, which is
+;;     produced by ALU.
+;;   ALU, ALU_SHIFT, SIMD
+;;     Compute data in ALU and produce the data. The result is ready at E2.
+;;   MUL, MAC
+;;     Compute data in the multiply-adder and produce the data. The result
+;;     is ready at E2.
+;;   DIV
+;;     Compute data in the divider and produce the data. The result is ready
+;;     at E2.
+;;   BR
+;;     Branch-with-link instructions produces a result containing the return
+;;     address. The result is ready at E2.
+;;
+;; Consumers (RHS)
+;;   ALU
+;;     General ALU instructions require operands at E2.
+;;   ALU_E1
+;;     Some special ALU instructions, such as BSE, BSP and MOVD44, require
+;;     operand at E1.
+;;   MUL, DIV, PBSAD, MMU
+;;     Operands are required at E1.
+;;   PBSADA_Rt, PBSADA_RaRb
+;;     Operands Ra and Rb are required at E1, and the operand Rt is required
+;;     at E2.
+;;   ALU_SHIFT_Rb
+;;     An ALU-SHIFT instruction consists of a shift micro-operation followed
+;;     by an arithmetic micro-operation. The operand Rb is used by the first
+;;     micro-operation, and there are some latencies if data dependency occurs.
+;;   MAC_RaRb
+;;     A MAC instruction does multiplication at E1 and does accumulation at E2,
+;;     so the operand Rt is required at E2, and operands Ra and Rb are required
+;;     at E1.
+;;   ADDR_IN
+;;     If an instruction requires an address as its input operand, the address
+;;     is required at E1.
+;;   ST
+;;     A store instruction requires its data at E2.
+;;   SMW(N, M)
+;;     There are N micro-operations within an instruction that stores multiple
+;;     words. Each M-th micro-operation requires its data at E2.
+;;   BR
+;;     If a branch instruction is conditional, its input data is required at E2.
+
+;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 3
+  "nds_n13_load"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_load_to_e1_p"
+)
+
+;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_n13_load"
+  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_load_to_e2_p"
+)
+
+;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 3
+  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_last_load_to_e1_p")
+
+;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
+(define_bypass 2
+  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_last_load_to_e2_p"
+)
+
+;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 2
+  "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_last_two_load_to_e1_p")
+
+;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT
+;;   ->  ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 2
+  "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+   nds_n13_mul, nds_n13_mac, nds_n13_div,\
+   nds_n13_mmu,\
+   nds_n13_load, nds_n13_store,\
+   nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+   nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+   nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+   nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+   nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+   nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+  "nds32_n13_e2_to_e1_p")
diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h
index 5d7e1652749..8d761964439 100644
--- a/gcc/config/nds32/nds32-opts.h
+++ b/gcc/config/nds32/nds32-opts.h
@@ -29,6 +29,7 @@ enum nds32_arch_type
 {
   ARCH_V2,
   ARCH_V3,
+  ARCH_V3J,
   ARCH_V3M,
   ARCH_V3F,
   ARCH_V3S
@@ -42,6 +43,10 @@ enum nds32_cpu_type
   CPU_N8,
   CPU_E8,
   CPU_N9,
+  CPU_N10,
+  CPU_GRAYWOLF,
+  CPU_N12,
+  CPU_N13,
   CPU_SIMPLE
 };
 
@@ -53,6 +58,13 @@ enum nds32_cmodel_type
   CMODEL_LARGE
 };
 
+/* The code model defines the address generation strategy.  */
+enum nds32_ict_model_type
+{
+  ICT_MODEL_SMALL,
+  ICT_MODEL_LARGE
+};
+
 /* Multiply instruction configuration.  */
 enum nds32_mul_type
 {
diff --git a/gcc/config/nds32/nds32-peephole2.md b/gcc/config/nds32/nds32-peephole2.md
index a5e77b1dcc7..033f62bae5a 100644
--- a/gcc/config/nds32/nds32-peephole2.md
+++ b/gcc/config/nds32/nds32-peephole2.md
@@ -22,3 +22,139 @@
 ;; Use define_peephole2 to handle possible target-specific optimization.
 
 ;; ------------------------------------------------------------------------
+;; Try to utilize 16-bit instruction by swap operand if possible.
+;; ------------------------------------------------------------------------
+
+;; Try to make add as add45.
+(define_peephole2
+  [(set (match_operand:QIHISI 0 "register_operand"              "")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand" "")
+		     (match_operand:QIHISI 2 "register_operand" "")))]
+  "reload_completed
+   && TARGET_16_BIT
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))"
+  [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))])
+
+;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand"    "")
+	(match_operator:SI 1 "nds32_have_33_inst_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "register_operand" "")]))]
+  "reload_completed
+   && TARGET_16_BIT
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0]))
+   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))])
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_16_BIT
+   && !TARGET_ISA_V2
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
+   && ((REGNO (operands[0]) & 0x1) == 0)
+   && ((REGNO (operands[1]) & 0x1) == 0)
+   && (REGNO (operands[0]) + 1) == REGNO (operands[2])
+   && (REGNO (operands[1]) + 1) == REGNO (operands[3])"
+  "movd44\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length" "2")])
+
+;; Merge two fcpyss to fcpysd.
+(define_peephole2
+  [(set (match_operand:SF 0 "float_even_register_operand" "")
+	(match_operand:SF 1 "float_even_register_operand" ""))
+   (set (match_operand:SF 2 "float_odd_register_operand"  "")
+	(match_operand:SF 3 "float_odd_register_operand"  ""))]
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+   && REGNO (operands[0]) == REGNO (operands[2]) - 1
+   && REGNO (operands[1]) == REGNO (operands[3]) - 1"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0]));
+    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1]));
+  })
+
+(define_peephole2
+  [(set (match_operand:SF 0 "float_odd_register_operand"  "")
+	(match_operand:SF 1 "float_odd_register_operand"  ""))
+   (set (match_operand:SF 2 "float_even_register_operand" "")
+	(match_operand:SF 3 "float_even_register_operand" ""))]
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+   && REGNO (operands[2]) == REGNO (operands[0]) - 1
+   && REGNO (operands[3]) == REGNO (operands[1]) - 1"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2]));
+    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3]));
+  })
+
+;; ------------------------------------------------------------------------
+;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is
+;; unused, so we use split to drop mod operation for lower register pressure.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(div:SI (match_operand:SI 1 "register_operand")
+		(match_operand:SI 2 "register_operand")))
+   (set (match_operand:SI 3 "register_operand")
+	(mod:SI (match_dup 1) (match_dup 2)))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+	(div:SI (match_dup 1)
+		(match_dup 2)))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(udiv:SI (match_operand:SI 1 "register_operand")
+		 (match_operand:SI 2 "register_operand")))
+   (set (match_operand:SI 3 "register_operand")
+	(umod:SI (match_dup 1) (match_dup 2)))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+	(udiv:SI (match_dup 1)
+		 (match_dup 2)))])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "NDS32_EXT_DSP_P ()
+   && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))"
+  [(const_int 1)]
+{
+  rtx highpart = nds32_di_high_part_subreg (operands[0]);
+  emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nds32_general_register_operand" "")
+	(match_operand:DI 1 "nds32_general_register_operand" ""))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL
+   || find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  HOST_WIDE_INT offset;
+  if (dead_note == NULL_RTX)
+    offset = 0;
+  else
+    offset = 4;
+  operands[0] = simplify_gen_subreg (
+		  SImode, operands[0],
+		  DImode, offset);
+  operands[1] = simplify_gen_subreg (
+		  SImode, operands[1],
+		  DImode, offset);
+})
diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c
index a983238cdbb..53619d22510 100644
--- a/gcc/config/nds32/nds32-pipelines-auxiliary.c
+++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c
@@ -306,6 +306,19 @@ pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg)
   return false;
 }
 
+/* Determine if the latency is occured when the consumer PBSADA_INSN uses the
+   value of DEF_REG in its Rt field.  */
+bool
+pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg)
+{
+  rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn));
+
+  if (rtx_equal_p (def_reg, pbsada_rt))
+    return true;
+
+  return false;
+}
+
 /* Check if INSN is a movd44 insn consuming DEF_REG.  */
 bool
 movd44_even_dep_p (rtx_insn *insn, rtx def_reg)
@@ -335,6 +348,103 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg)
   return false;
 }
 
+/* Check if INSN is a wext insn consuming DEF_REG.  */
+bool
+wext_odd_dep_p (rtx insn, rtx def_reg)
+{
+  rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0);
+  rtx use_reg = XEXP (shift_rtx, 0);
+  rtx pos_rtx = XEXP (shift_rtx, 1);
+
+  if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx))
+    return true;
+
+  if (GET_MODE (def_reg) == DImode)
+    return reg_overlap_p (def_reg, use_reg);
+
+  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+  gcc_assert (REG_P (use_reg));
+
+  if (REG_P (def_reg))
+    {
+      if (!TARGET_BIG_ENDIAN)
+	return REGNO (def_reg) == REGNO (use_reg) + 1;
+      else
+	return  REGNO (def_reg) == REGNO (use_reg);
+    }
+
+  if (GET_CODE (def_reg) == SUBREG)
+    {
+      if (!reg_overlap_p (def_reg, use_reg))
+	return false;
+
+      if (!TARGET_BIG_ENDIAN)
+	return SUBREG_BYTE (def_reg) == 4;
+      else
+	return SUBREG_BYTE (def_reg) == 0;
+    }
+
+  return false;
+}
+
+/* Check if INSN is a bpick insn consuming DEF_REG.  */
+bool
+bpick_ra_rb_dep_p (rtx insn, rtx def_reg)
+{
+  rtx ior_rtx = SET_SRC (PATTERN (insn));
+  rtx and1_rtx = XEXP (ior_rtx, 0);
+  rtx and2_rtx = XEXP (ior_rtx, 1);
+  rtx reg1_0 = XEXP (and1_rtx, 0);
+  rtx reg1_1 = XEXP (and1_rtx, 1);
+  rtx reg2_0 = XEXP (and2_rtx, 0);
+  rtx reg2_1 = XEXP (and2_rtx, 1);
+
+  if (GET_CODE (reg1_0) == NOT)
+    {
+      if (rtx_equal_p (reg1_0, reg2_0))
+	return reg_overlap_p (def_reg, reg1_1)
+	       || reg_overlap_p (def_reg, reg2_1);
+
+      if (rtx_equal_p (reg1_0, reg2_1))
+	return reg_overlap_p (def_reg, reg1_1)
+	       || reg_overlap_p (def_reg, reg2_0);
+    }
+
+  if (GET_CODE (reg1_1) == NOT)
+    {
+      if (rtx_equal_p (reg1_1, reg2_0))
+	return reg_overlap_p (def_reg, reg1_0)
+	       || reg_overlap_p (def_reg, reg2_1);
+
+      if (rtx_equal_p (reg1_1, reg2_1))
+	return reg_overlap_p (def_reg, reg1_0)
+	       || reg_overlap_p (def_reg, reg2_0);
+    }
+
+  if (GET_CODE (reg2_0) == NOT)
+    {
+      if (rtx_equal_p (reg2_0, reg1_0))
+	return reg_overlap_p (def_reg, reg2_1)
+	       || reg_overlap_p (def_reg, reg1_1);
+
+      if (rtx_equal_p (reg2_0, reg1_1))
+	return reg_overlap_p (def_reg, reg2_1)
+	       || reg_overlap_p (def_reg, reg1_0);
+    }
+
+  if (GET_CODE (reg2_1) == NOT)
+    {
+      if (rtx_equal_p (reg2_1, reg1_0))
+	return reg_overlap_p (def_reg, reg2_0)
+	       || reg_overlap_p (def_reg, reg1_1);
+
+      if (rtx_equal_p (reg2_1, reg1_1))
+	return reg_overlap_p (def_reg, reg2_0)
+	       || reg_overlap_p (def_reg, reg1_0);
+    }
+
+  gcc_unreachable ();
+}
 } // namespace scheduling
 } // namespace nds32
 
@@ -375,8 +485,7 @@ n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg)
       operations in order to write two registers. We have to check the
       dependency from the producer to the first micro-operation.  */
     case TYPE_DIV:
-      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
-	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+      if (divmod_p (consumer))
 	use_rtx = SET_SRC (parallel_element (consumer, 0));
       else
 	use_rtx = SET_SRC (PATTERN (consumer));
@@ -506,8 +615,7 @@ n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg)
       operations in order to write two registers. We have to check the
       dependency from the producer to the first micro-operation.  */
     case TYPE_DIV:
-      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
-	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+      if (divmod_p (consumer))
 	use_rtx = SET_SRC (parallel_element (consumer, 0));
       else
 	use_rtx = SET_SRC (PATTERN (consumer));
@@ -606,8 +714,7 @@ n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
       break;
 
     case TYPE_DIV:
-      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
-	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+      if (divmod_p (consumer))
 	use_rtx = SET_SRC (parallel_element (consumer, 0));
       else
 	use_rtx = SET_SRC (PATTERN (consumer));
@@ -706,8 +813,7 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
       We have to check the dependency from the producer to the first
       micro-operation.  */
     case TYPE_DIV:
-      if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
-	  || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+      if (divmod_p (consumer))
 	use_rtx = SET_SRC (parallel_element (consumer, 0));
       else
 	use_rtx = SET_SRC (PATTERN (consumer));
@@ -744,7 +850,316 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
   return false;
 }
 
+/* Check the dependency between the producer defining DEF_REG and CONSUMER
+   requiring input operand at EX.  */
+bool
+n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+    case TYPE_PBSAD:
+    case TYPE_MUL:
+    case TYPE_DALU:
+    case TYPE_DALU64:
+    case TYPE_DMUL:
+    case TYPE_DPACK:
+    case TYPE_DINSB:
+    case TYPE_DCMP:
+    case TYPE_DCLIP:
+    case TYPE_DALUROUND:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_ALU_SHIFT:
+      use_rtx = extract_shift_reg (consumer);
+      break;
+
+    case TYPE_PBSADA:
+      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+    case TYPE_MAC:
+    case TYPE_DMAC:
+      use_rtx = extract_mac_non_acc_rtx (consumer);
+      break;
+
+   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+      results, the quotient and the remainder.  */
+    case TYPE_DIV:
+      if (divmod_p (consumer))
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_DWEXT:
+      return wext_odd_dep_p (consumer, def_reg);
+
+    case TYPE_DBPICK:
+      return bpick_ra_rb_dep_p (consumer, def_reg);
+
+    case TYPE_MMU:
+      if (GET_CODE (PATTERN (consumer)) == SET)
+	use_rtx = SET_SRC (PATTERN (consumer));
+      else
+	return true;
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      use_rtx = extract_mem_rtx (consumer);
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = PATTERN (consumer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+/* Check the dependency between the producer defining DEF_REG and CONSUMER
+   requiring input operand at EX.  */
+bool
+gw_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+    case TYPE_PBSAD:
+    case TYPE_MUL:
+    case TYPE_DALU:
+    case TYPE_DALU64:
+    case TYPE_DMUL:
+    case TYPE_DPACK:
+    case TYPE_DINSB:
+    case TYPE_DCMP:
+    case TYPE_DCLIP:
+    case TYPE_DALUROUND:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_ALU_SHIFT:
+      use_rtx = extract_shift_reg (consumer);
+      break;
+
+    case TYPE_PBSADA:
+      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+    case TYPE_MAC:
+    case TYPE_DMAC:
+      use_rtx = extract_mac_non_acc_rtx (consumer);
+      break;
+
+   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+      results, the quotient and the remainder.  We have to check the
+      dependency from the producer to the first micro-operation.  */
+    case TYPE_DIV:
+      if (divmod_p (consumer))
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_DWEXT:
+      return wext_odd_dep_p (consumer, def_reg);
+
+    case TYPE_DBPICK:
+      return bpick_ra_rb_dep_p (consumer, def_reg);
+
+    case TYPE_MMU:
+      if (GET_CODE (PATTERN (consumer)) == SET)
+	use_rtx = SET_SRC (PATTERN (consumer));
+      else
+	return true;
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      use_rtx = extract_mem_rtx (consumer);
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = PATTERN (consumer);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+/* Check dependencies from any stages to ALU_E1 (E1).  This is a helper
+   function of n13_consumed_by_e1_dep_p ().  */
+bool
+n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg)
+{
+  rtx unspec_rtx, operand_ra, operand_rb;
+  rtx src_rtx, dst_rtx;
+
+  switch (INSN_CODE (alu_e1_insn))
+    {
+    /* BSP and BSE are supported by built-in functions, the corresponding
+       patterns are formed by UNSPEC RTXs.  We have to handle them
+       individually.  */
+    case CODE_FOR_unspec_bsp:
+    case CODE_FOR_unspec_bse:
+      unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0));
+      gcc_assert (GET_CODE (unspec_rtx) == UNSPEC);
+
+      operand_ra = XVECEXP (unspec_rtx, 0, 0);
+      operand_rb = XVECEXP (unspec_rtx, 0, 1);
+
+      if (rtx_equal_p (def_reg, operand_ra)
+	  || rtx_equal_p (def_reg, operand_rb))
+	return true;
+
+      return false;
+
+    /* Unlink general ALU instructions, MOVD44 requires operands at E1.  */
+    case CODE_FOR_move_di:
+    case CODE_FOR_move_df:
+      src_rtx = SET_SRC (PATTERN (alu_e1_insn));
+      dst_rtx = SET_DEST (PATTERN (alu_e1_insn));
+
+      if (REG_P (dst_rtx) && REG_P (src_rtx)
+	  && rtx_equal_p (src_rtx, def_reg))
+	return true;
+
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Check the dependency between the producer defining DEF_REG and CONSUMER
+   requiring input operand at E1.  Because the address generation unti is
+   at E1, the address input should be ready at E1.  Note that the branch
+   target is also a kind of addresses, so we have to check it.  */
+bool
+n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    /* ALU_E1 */
+    case TYPE_ALU:
+      return n13_alu_e1_insn_dep_reg_p (consumer, def_reg);
+
+    case TYPE_PBSADA:
+      return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+    case TYPE_PBSAD:
+    case TYPE_MUL:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MAC:
+      use_rtx = extract_mac_non_acc_rtx (consumer);
+      break;
+
+    case TYPE_DIV:
+      if (divmod_p (consumer))
+	use_rtx = SET_SRC (parallel_element (consumer, 0));
+      else
+	use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_MMU:
+      if (GET_CODE (PATTERN (consumer)) == SET)
+	use_rtx = SET_SRC (PATTERN (consumer));
+      else
+	return true;
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = extract_branch_target_rtx (consumer);
+      break;
+
+    case TYPE_LOAD:
+    case TYPE_STORE:
+      use_rtx = extract_mem_rtx (consumer);
+      break;
+
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_base_reg (consumer);
+      break;
+
+    default:
+      return false;
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
+
+  return false;
+}
+
+/* Check the dependency between the producer defining DEF_REG and CONSUMER
+   requiring input operand at E2.  */
+bool
+n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg)
+{
+  rtx use_rtx;
+
+  switch (get_attr_type (consumer))
+    {
+    case TYPE_ALU:
+    case TYPE_STORE:
+      use_rtx = SET_SRC (PATTERN (consumer));
+      break;
+
+    case TYPE_ALU_SHIFT:
+      use_rtx = extract_shift_reg (consumer);
+      break;
+
+    case TYPE_PBSADA:
+      return pbsada_insn_rt_dep_reg_p (consumer, def_reg);
+
+    case TYPE_STORE_MULTIPLE:
+      use_rtx = extract_nth_access_rtx (consumer, 0);
+      break;
+
+    case TYPE_BRANCH:
+      use_rtx = extract_branch_condition_rtx (consumer);
+      break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  if (reg_overlap_p (def_reg, use_rtx))
+    return true;
 
+  return false;
+}
 } // anonymous namespace
 
 /* ------------------------------------------------------------------------ */
@@ -837,8 +1252,7 @@ nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
       break;
 
     case TYPE_DIV:
-      if (INSN_CODE (producer) == CODE_FOR_divmodsi4
-	  || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+      if (divmod_p (producer))
 	def_reg = SET_DEST (parallel_element (producer, 1));
       else
 	def_reg = SET_DEST (PATTERN (producer));
@@ -969,8 +1383,7 @@ nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
       break;
 
     case TYPE_DIV:
-      if (INSN_CODE (producer) == CODE_FOR_divmodsi4
-	  || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+      if (divmod_p (producer))
 	{
 	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
 	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
@@ -1073,8 +1486,7 @@ nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
       results, the quotient and the remainder.  We have to handle them
       individually.  */
     case TYPE_DIV:
-      if (INSN_CODE (producer) == CODE_FOR_divmodsi4
-	  || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+      if (divmod_p (producer))
 	{
 	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
 	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
@@ -1132,4 +1544,245 @@ nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
     return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg);
 }
 
+/* Guard functions for N10 cores.  */
+
+/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN).  */
+bool
+nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  gcc_assert (get_attr_type (producer) == TYPE_FLOAD
+	      || get_attr_type (producer) == TYPE_FSTORE);
+  gcc_assert (get_attr_type (consumer) == TYPE_FLOAD
+	      || get_attr_type (consumer) == TYPE_FSTORE);
+
+  if (!post_update_insn_p (producer))
+    return false;
+
+  return reg_overlap_p (extract_base_reg (producer),
+			extract_mem_rtx (consumer));
+}
+
+/* Check dependencies from MM to EX.  */
+bool
+nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_LOAD:
+    case TYPE_MUL:
+    case TYPE_MAC:
+    case TYPE_DALU64:
+    case TYPE_DMUL:
+    case TYPE_DMAC:
+    case TYPE_DALUROUND:
+    case TYPE_DBPICK:
+    case TYPE_DWEXT:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+      results, the quotient and the remainder.  We have to handle them
+      individually.  */
+    case TYPE_DIV:
+      if (divmod_p (producer))
+	{
+	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+	  return (n10_consumed_by_ex_dep_p (consumer, def_reg1)
+		  || n10_consumed_by_ex_dep_p (consumer, def_reg2));
+	}
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+    return n10_consumed_by_ex_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from LMW(N, N) to EX.  */
+bool
+nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  return n10_consumed_by_ex_dep_p (consumer, last_def_reg);
+}
+
+/* Guard functions for Graywolf cores.  */
+
+/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN).  */
+bool
+nds32_gw_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  return nds32_n10_ex_to_ex_p (producer, consumer);
+}
+
+/* Check dependencies from MM to EX.  */
+bool
+nds32_gw_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    case TYPE_LOAD:
+    case TYPE_MUL:
+    case TYPE_MAC:
+    case TYPE_DALU64:
+    case TYPE_DMUL:
+    case TYPE_DMAC:
+    case TYPE_DALUROUND:
+    case TYPE_DBPICK:
+    case TYPE_DWEXT:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+   /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+      results, the quotient and the remainder.  We have to handle them
+      individually.  */
+    case TYPE_DIV:
+      if (divmod_p (producer))
+	{
+	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+	  return (gw_consumed_by_ex_dep_p (consumer, def_reg1)
+		  || gw_consumed_by_ex_dep_p (consumer, def_reg2));
+	}
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+    return gw_consumed_by_ex_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from LMW(N, N) to EX.  */
+bool
+nds32_gw_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  return gw_consumed_by_ex_dep_p (consumer, last_def_reg);
+}
+
+/* Guard functions for N12/N13 cores.  */
+
+/* Check dependencies from E2 to E1.  */
+bool
+nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx def_reg;
+
+  switch (get_attr_type (producer))
+    {
+    /* Only post-update load/store instructions are considered.  These
+       instructions produces address output at E2.  */
+    case TYPE_LOAD:
+    case TYPE_STORE:
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      if (!post_update_insn_p (producer))
+	return false;
+
+      def_reg = extract_base_reg (producer);
+      break;
+
+    case TYPE_ALU:
+    case TYPE_ALU_SHIFT:
+    case TYPE_PBSAD:
+    case TYPE_PBSADA:
+    case TYPE_MUL:
+    case TYPE_MAC:
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    case TYPE_BRANCH:
+      return true;
+
+    case TYPE_DIV:
+      /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+	 results, the quotient and the remainder.  We have to handle them
+	 individually.  */
+      if (divmod_p (producer))
+	{
+	  rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+	  rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+	  return (n13_consumed_by_e1_dep_p (consumer, def_reg1)
+		  || n13_consumed_by_e1_dep_p (consumer, def_reg2));
+	}
+
+      def_reg = SET_DEST (PATTERN (producer));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return n13_consumed_by_e1_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from Load-Store Unit (E3) to E1.  */
+bool
+nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  gcc_assert (get_attr_type (producer) == TYPE_LOAD);
+  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+
+  return n13_consumed_by_e1_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from Load-Store Unit (E3) to E2.  */
+bool
+nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx def_reg = SET_DEST (PATTERN (producer));
+
+  gcc_assert (get_attr_type (producer) == TYPE_LOAD);
+  gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+
+  return n13_consumed_by_e2_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from LMW(N, N) to E1.  */
+bool
+nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  return n13_consumed_by_e1_dep_p (consumer, last_def_reg);
+}
+
+/* Check dependencies from LMW(N, N) to E2.  */
+bool
+nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+  return n13_consumed_by_e2_dep_p (consumer, last_def_reg);
+}
+
+/* Check dependencies from LMW(N, N-1) to E2.  */
+bool
+nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx last_two_def_reg = extract_nth_access_reg (producer, -2);
+
+  if (last_two_def_reg == NULL_RTX)
+    return false;
+
+  return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg);
+}
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-predicates.c b/gcc/config/nds32/nds32-predicates.c
index 5e01430c8e3..b41b6c7f438 100644
--- a/gcc/config/nds32/nds32-predicates.c
+++ b/gcc/config/nds32/nds32-predicates.c
@@ -356,54 +356,57 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p)
 }
 
 /* Function to check if 'bclr' instruction can be used with IVAL.  */
-int
-nds32_can_use_bclr_p (int ival)
+bool
+nds32_can_use_bclr_p (HOST_WIDE_INT ival)
 {
   int one_bit_count;
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
 
   /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit,
      it means the original ival has only one 0-bit,
      So it is ok to perform 'bclr' operation.  */
 
-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival));
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival) & mask);
 
   /* 'bclr' is a performance extension instruction.  */
   return (TARGET_EXT_PERF && (one_bit_count == 1));
 }
 
 /* Function to check if 'bset' instruction can be used with IVAL.  */
-int
-nds32_can_use_bset_p (int ival)
+bool
+nds32_can_use_bset_p (HOST_WIDE_INT ival)
 {
   int one_bit_count;
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
 
   /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
      it is ok to perform 'bset' operation.  */
 
-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask);
 
   /* 'bset' is a performance extension instruction.  */
   return (TARGET_EXT_PERF && (one_bit_count == 1));
 }
 
 /* Function to check if 'btgl' instruction can be used with IVAL.  */
-int
-nds32_can_use_btgl_p (int ival)
+bool
+nds32_can_use_btgl_p (HOST_WIDE_INT ival)
 {
   int one_bit_count;
+  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode);
 
   /* Caculate the number of 1-bit of ival, if there is only one 1-bit,
      it is ok to perform 'btgl' operation.  */
 
-  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival));
+  one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask);
 
   /* 'btgl' is a performance extension instruction.  */
   return (TARGET_EXT_PERF && (one_bit_count == 1));
 }
 
 /* Function to check if 'bitci' instruction can be used with IVAL.  */
-int
-nds32_can_use_bitci_p (int ival)
+bool
+nds32_can_use_bitci_p (HOST_WIDE_INT ival)
 {
   /* If we are using V3 ISA, we have 'bitci' instruction.
      Try to see if we can present 'andi' semantic with
@@ -515,4 +518,117 @@ nds32_const_double_range_ok_p (rtx op, machine_mode mode,
 
   return val >= lower && val < upper;
 }
+
+bool
+nds32_const_unspec_p (rtx x)
+{
+  if (GET_CODE (x) == CONST)
+    {
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	x = XEXP (x, 0);
+
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_GOTINIT:
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	    case UNSPEC_TLSGD:
+	    case UNSPEC_TLSLD:
+	    case UNSPEC_TLSIE:
+	    case UNSPEC_TLSLE:
+	      return false;
+	    default:
+	      return true;
+	    }
+	}
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x))
+    return false;
+
+  return true;
+}
+
+HOST_WIDE_INT
+const_vector_to_hwint (rtx op)
+{
+  HOST_WIDE_INT hwint = 0;
+  HOST_WIDE_INT mask;
+  int i;
+  int shift_adv;
+  int shift = 0;
+  int nelem;
+
+  switch (GET_MODE (op))
+    {
+      case E_V2HImode:
+	mask = 0xffff;
+	shift_adv = 16;
+	nelem = 2;
+	break;
+      case E_V4QImode:
+	mask = 0xff;
+	shift_adv = 8;
+	nelem = 4;
+	break;
+      default:
+	gcc_unreachable ();
+    }
+
+  if (TARGET_BIG_ENDIAN)
+    {
+      for (i = 0; i < nelem; ++i)
+	{
+	  HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0);
+	  hwint |= (val & mask) << shift;
+	  shift = shift + shift_adv;
+	}
+    }
+  else
+    {
+      for (i = 0; i < nelem; ++i)
+	{
+	  HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0);
+	  hwint |= (val & mask) << shift;
+	  shift = shift + shift_adv;
+	}
+    }
+
+  return hwint;
+}
+
+bool
+nds32_valid_CVp5_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16));
+}
+
+bool
+nds32_valid_CVs5_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < (1 << 4)) && (ival >= -(1 << 4));
+}
+
+bool
+nds32_valid_CVs2_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival < (1 << 19)) && (ival >= -(1 << 19));
+}
+
+bool
+nds32_valid_CVhi_p (rtx op)
+{
+  HOST_WIDE_INT ival = const_vector_to_hwint (op);
+  return (ival != 0) && ((ival & 0xfff) == 0);
+}
+
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h
index e7b7d4170cc..7fb2315d0ff 100644
--- a/gcc/config/nds32/nds32-protos.h
+++ b/gcc/config/nds32/nds32-protos.h
@@ -69,9 +69,10 @@ extern unsigned int nds32_dbx_register_number (unsigned int);
 
 /* ------------------------------------------------------------------------ */
 
-/* Auxiliary functions for lwm/smw.  */
+/* Auxiliary functions for manipulation DI mode.  */
 
-extern bool nds32_valid_smw_lwm_base_p (rtx);
+extern rtx nds32_di_high_part_subreg(rtx);
+extern rtx nds32_di_low_part_subreg(rtx);
 
 /* Auxiliary functions for expanding rtl used in nds32-multiple.md.  */
 
@@ -116,6 +117,20 @@ extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *);
 extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *);
 extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *);
 
+extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *);
+extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *);
+extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *);
+
+extern bool nds32_gw_ex_to_ex_p (rtx_insn *, rtx_insn *);
+extern bool nds32_gw_mm_to_ex_p (rtx_insn *, rtx_insn *);
+extern bool nds32_gw_last_load_to_ex_p (rtx_insn *, rtx_insn *);
+
+extern bool nds32_n13_e2_to_e1_p (rtx_insn *, rtx_insn *);
+extern bool nds32_n13_load_to_e1_p (rtx_insn *, rtx_insn *);
+extern bool nds32_n13_load_to_e2_p (rtx_insn *, rtx_insn *);
+extern bool nds32_n13_last_load_to_e1_p (rtx_insn *, rtx_insn *);
+extern bool nds32_n13_last_load_to_e2_p (rtx_insn *, rtx_insn *);
+extern bool nds32_n13_last_two_load_to_e1_p (rtx_insn *, rtx_insn *);
 
 /* Auxiliary functions for stack operation predicate checking.  */
 
@@ -123,24 +138,25 @@ extern bool nds32_valid_stack_push_pop_p (rtx, bool);
 
 /* Auxiliary functions for bit operation detection.  */
 
-extern int nds32_can_use_bclr_p (int);
-extern int nds32_can_use_bset_p (int);
-extern int nds32_can_use_btgl_p (int);
+extern bool nds32_can_use_bclr_p (HOST_WIDE_INT);
+extern bool nds32_can_use_bset_p (HOST_WIDE_INT);
+extern bool nds32_can_use_btgl_p (HOST_WIDE_INT);
 
-extern int nds32_can_use_bitci_p (int);
+extern bool nds32_can_use_bitci_p (HOST_WIDE_INT);
 
 extern bool nds32_const_double_range_ok_p (rtx, machine_mode,
 					   HOST_WIDE_INT, HOST_WIDE_INT);
 
+extern bool nds32_const_unspec_p (rtx x);
+
 /* Auxiliary function for 'Computing the Length of an Insn'.  */
 
 extern int nds32_adjust_insn_length (rtx_insn *, int);
 
 /* Auxiliary functions for FP_AS_GP detection.  */
 
-extern int nds32_fp_as_gp_check_available (void);
-
 extern bool nds32_symbol_load_store_p (rtx_insn *);
+extern bool nds32_naked_function_p (tree);
 
 /* Auxiliary functions for jump table generation.  */
 
@@ -159,10 +175,50 @@ extern void nds32_expand_float_cstore (rtx *);
 extern enum nds32_expand_result_type nds32_expand_movcc (rtx *);
 extern void nds32_expand_float_movcc (rtx *);
 
+/* Auxiliary functions for expand extv/insv instruction.  */
+
+extern enum nds32_expand_result_type nds32_expand_extv (rtx *);
+extern enum nds32_expand_result_type nds32_expand_insv (rtx *);
+
+/* Auxiliary functions for expand PIC instruction.  */
+
+extern void nds32_expand_pic_move (rtx *);
+
+/* Auxiliary functions to legitimize PIC address.  */
+
+extern rtx nds32_legitimize_pic_address (rtx);
+
+/* Auxiliary functions for expand TLS instruction.  */
+
+extern void nds32_expand_tls_move (rtx *);
+
+/* Auxiliary functions to legitimize TLS address.  */
+
+extern rtx nds32_legitimize_tls_address (rtx);
+
+/* Auxiliary functions to identify thread-local symbol.  */
+
+extern bool nds32_tls_referenced_p (rtx);
+
+/* Auxiliary functions for expand ICT instruction.  */
+
+extern void nds32_expand_ict_move (rtx *);
+
+/* Auxiliary functions to legitimize address for indirect-call symbol.  */
+
+extern rtx nds32_legitimize_ict_address (rtx);
+
+/* Auxiliary functions to identify indirect-call symbol.  */
+
+extern bool nds32_indirect_call_referenced_p (rtx);
 
 /* Auxiliary functions to identify long-call symbol.  */
 extern bool nds32_long_call_p (rtx);
 
+/* Auxiliary functions to identify SYMBOL_REF and LABEL_REF pattern.  */
+
+extern bool symbolic_reference_mentioned_p (rtx);
+
 /* Auxiliary functions to identify conditional move comparison operand.  */
 
 extern int nds32_cond_move_p (rtx);
@@ -185,6 +241,7 @@ extern const char *nds32_output_32bit_load_s (rtx *, int);
 extern const char *nds32_output_float_load(rtx *);
 extern const char *nds32_output_float_store(rtx *);
 extern const char *nds32_output_smw_single_word (rtx *);
+extern const char *nds32_output_smw_double_word (rtx *);
 extern const char *nds32_output_lmw_single_word (rtx *);
 extern const char *nds32_output_double (rtx *, bool);
 extern const char *nds32_output_cbranchsi4_equality_zero (rtx_insn *, rtx *);
@@ -193,9 +250,12 @@ extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *
 								      rtx *);
 extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *);
 
+extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool);
+
 extern const char *nds32_output_call (rtx, rtx *, rtx,
 				      const char *, const char *, bool);
-
+extern const char *nds32_output_tls_desc (rtx *);
+extern const char *nds32_output_tls_ie (rtx *);
 
 /* Auxiliary functions to output stack push/pop instruction.  */
 
@@ -203,9 +263,19 @@ extern const char *nds32_output_stack_push (rtx);
 extern const char *nds32_output_stack_pop (rtx);
 extern const char *nds32_output_return (void);
 
+
+/* Auxiliary functions to split/output sms pattern.  */
+extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx);
+extern const char *nds32_output_sms (rtx, rtx, rtx, rtx);
+extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+
 /* Auxiliary functions to split double word RTX pattern.  */
 
 extern void nds32_spilt_doubleword (rtx *, bool);
+extern void nds32_split_ashiftdi3 (rtx, rtx, rtx);
+extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx);
+extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx);
+extern void nds32_split_rotatertdi3 (rtx, rtx, rtx);
 
 /* Auxiliary functions to split large constant RTX pattern.  */
 
@@ -237,15 +307,29 @@ extern void nds32_construct_isr_vectors_information (tree, const char *);
 extern void nds32_asm_file_start_for_isr (void);
 extern void nds32_asm_file_end_for_isr (void);
 extern bool nds32_isr_function_p (tree);
+extern bool nds32_isr_function_critical_p (tree);
 
 /* Auxiliary functions for cost calculation.  */
 
+extern void nds32_init_rtx_costs (void);
 extern bool nds32_rtx_costs_impl (rtx, machine_mode, int, int, int *, bool);
 extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool);
 
 /* Auxiliary functions for pre-define marco.  */
 extern void nds32_cpu_cpp_builtins(struct cpp_reader *);
 
+/* Auxiliary functions for const_vector's constraints.  */
+
+extern HOST_WIDE_INT const_vector_to_hwint (rtx);
+extern bool nds32_valid_CVp5_p (rtx);
+extern bool nds32_valid_CVs5_p (rtx);
+extern bool nds32_valid_CVs2_p (rtx);
+extern bool nds32_valid_CVhi_p (rtx);
+
+/* Auxiliary functions for lwm/smw.  */
+
+extern bool nds32_valid_smw_lwm_base_p (rtx);
+
 extern bool nds32_split_double_word_load_store_p (rtx *,bool);
 
 namespace nds32 {
@@ -258,11 +342,13 @@ bool load_single_p (rtx_insn *);
 bool store_single_p (rtx_insn *);
 bool load_double_p (rtx_insn *);
 bool store_double_p (rtx_insn *);
+bool store_offset_reg_p (rtx_insn *);
 bool post_update_insn_p (rtx_insn *);
 bool immed_offset_p (rtx);
 int find_post_update_rtx (rtx_insn *);
 rtx extract_mem_rtx (rtx_insn *);
 rtx extract_base_reg (rtx_insn *);
+rtx extract_offset_rtx (rtx_insn *);
 
 rtx extract_shift_reg (rtx);
 
@@ -271,6 +357,8 @@ rtx extract_movd44_odd_reg (rtx_insn *);
 
 rtx extract_mac_non_acc_rtx (rtx_insn *);
 
+bool divmod_p (rtx_insn *);
+
 rtx extract_branch_target_rtx (rtx_insn *);
 rtx extract_branch_condition_rtx (rtx_insn *);
 } // namespace nds32
@@ -279,5 +367,6 @@ extern bool nds32_use_load_post_increment(machine_mode);
 
 /* Functions for create nds32 specific optimization pass.  */
 extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *);
+extern rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *);
 
 /* ------------------------------------------------------------------------ */
diff --git a/gcc/config/nds32/nds32-relax-opt.c b/gcc/config/nds32/nds32-relax-opt.c
index 0349be4725d..e54bd978c2e 100644
--- a/gcc/config/nds32/nds32-relax-opt.c
+++ b/gcc/config/nds32/nds32-relax-opt.c
@@ -52,6 +52,8 @@
 #include "cfgrtl.h"
 #include "tree-pass.h"
 
+using namespace nds32;
+
 /* This is used to create unique relax hint id value.
    The initial value is 0.  */
 static int relax_group_id = 0;
@@ -185,6 +187,121 @@ nds32_plus_reg_load_store_p (rtx_insn *insn)
   return false;
 }
 
+/* Return true if x is const and the referance is ict symbol.  */
+static bool
+nds32_ict_const_p (rtx x)
+{
+  if (GET_CODE (x) == CONST)
+    {
+      x = XEXP (x, 0);
+      return nds32_indirect_call_referenced_p (x);
+    }
+  return FALSE;
+}
+
+/* Group the following pattern as relax candidates:
+
+   GOT:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      lw	$rb, [$ra + $gp]
+
+   GOTOFF, TLSLE:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      LS	$rb, [$ra + $gp]
+
+   GOTOFF, TLSLE:
+      sethi	$ra, hi20(sym)
+      ori	$ra, $ra, lo12(sym)
+      add	$rb, $ra, $gp($tp)
+
+   Initial GOT table:
+      sethi	$gp,hi20(sym)
+      ori	$gp, $gp, lo12(sym)
+      add5.pc	$gp  */
+
+static auto_vec<rtx_insn *, 32> nds32_group_infos;
+/* Group the PIC and TLS relax candidate instructions for linker.  */
+static bool
+nds32_pic_tls_group (rtx_insn *def_insn,
+		     enum nds32_relax_insn_type relax_type,
+		     int sym_type)
+{
+  df_ref def_record;
+  df_link *link;
+  rtx_insn *use_insn = NULL;
+  rtx pat, new_pat;
+  def_record = DF_INSN_DEFS (def_insn);
+  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
+    {
+      if (!DF_REF_INSN_INFO (link->ref))
+	continue;
+
+      use_insn = DF_REF_INSN (link->ref);
+
+      /* Skip if define insn and use insn not in the same basic block.  */
+      if (!dominated_by_p (CDI_DOMINATORS,
+			   BLOCK_FOR_INSN (use_insn),
+			   BLOCK_FOR_INSN (def_insn)))
+	return FALSE;
+
+      /* Skip if use_insn not active insn.  */
+      if (!active_insn_p (use_insn))
+	return FALSE;
+
+      switch (relax_type)
+	{
+	case RELAX_ORI:
+
+	  /* GOTOFF, TLSLE:
+	     sethi	$ra, hi20(sym)
+	     ori	$ra, $ra, lo12(sym)
+	     add	$rb, $ra, $gp($tp)  */
+	  if ((sym_type == UNSPEC_TLSLE
+	       || sym_type == UNSPEC_GOTOFF)
+	      && (recog_memoized (use_insn) == CODE_FOR_addsi3))
+	    {
+	      pat = XEXP (PATTERN (use_insn), 1);
+	      new_pat =
+		gen_rtx_UNSPEC (SImode,
+				gen_rtvec (2, XEXP (pat, 0), XEXP (pat, 1)),
+				UNSPEC_ADD32);
+	      validate_replace_rtx (pat, new_pat, use_insn);
+	      nds32_group_infos.safe_push (use_insn);
+	    }
+	  else if (nds32_plus_reg_load_store_p (use_insn)
+		   && !nds32_sp_base_or_plus_load_store_p (use_insn))
+	    nds32_group_infos.safe_push (use_insn);
+	  else
+	    return FALSE;
+	  break;
+
+	default:
+	  return FALSE;
+	}
+    }
+  return TRUE;
+}
+
+static int
+nds32_pic_tls_symbol_type (rtx x)
+{
+  x = XEXP (SET_SRC (PATTERN (x)), 1);
+
+  if (GET_CODE (x) == CONST)
+    {
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	x = XEXP (x, 0);
+
+      return XINT (x, 1);
+    }
+
+  return XINT (x, 1);
+}
+
 /* Group the relax candidates with group id.  */
 static void
 nds32_group_insns (rtx sethi)
@@ -193,6 +310,7 @@ nds32_group_insns (rtx sethi)
   df_link *link;
   rtx_insn *use_insn = NULL;
   rtx group_id;
+  bool valid;
 
   def_record = DF_INSN_DEFS (sethi);
 
@@ -242,6 +360,132 @@ nds32_group_insns (rtx sethi)
       /* Insert .relax_* directive.  */
       if (active_insn_p (use_insn))
 	emit_insn_before (gen_relax_group (group_id), use_insn);
+
+      /* Find ori ra, ra, unspec(symbol) instruction.  */
+      if (use_insn != NULL
+	  && recog_memoized (use_insn) == CODE_FOR_lo_sum
+	  && !nds32_const_unspec_p (XEXP (SET_SRC (PATTERN (use_insn)), 1)))
+	{
+	  int sym_type = nds32_pic_tls_symbol_type (use_insn);
+	  valid = nds32_pic_tls_group (use_insn, RELAX_ORI, sym_type);
+
+	  /* Insert .relax_* directive.  */
+	  while (!nds32_group_infos.is_empty ())
+	    {
+	      use_insn = nds32_group_infos.pop ();
+	      if (valid)
+		emit_insn_before (gen_relax_group (group_id), use_insn);
+	    }
+	}
+    }
+
+  relax_group_id++;
+}
+
+/* Convert relax group id in rtl.  */
+
+static void
+nds32_group_tls_insn (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  rtx unspec_relax_group = XEXP (XVECEXP (pat, 0, 1), 0);
+
+  while (GET_CODE (pat) != SET && GET_CODE (pat) == PARALLEL)
+    {
+      pat = XVECEXP (pat, 0, 0);
+    }
+
+  if (GET_CODE (unspec_relax_group) == UNSPEC
+      && XINT (unspec_relax_group, 1) == UNSPEC_VOLATILE_RELAX_GROUP)
+    {
+      XVECEXP (unspec_relax_group, 0, 0) = GEN_INT (relax_group_id);
+    }
+
+  relax_group_id++;
+}
+
+static bool
+nds32_float_reg_load_store_p (rtx_insn *insn)
+{
+  rtx pat = PATTERN (insn);
+
+  if (get_attr_type (insn) == TYPE_FLOAD
+      && GET_CODE (pat) == SET
+      && (GET_MODE (XEXP (pat, 0)) == SFmode
+	  || GET_MODE (XEXP (pat, 0)) == DFmode)
+      && MEM_P (XEXP (pat, 1)))
+    {
+      rtx addr = XEXP (XEXP (pat, 1), 0);
+
+      /* [$ra] */
+      if (REG_P (addr))
+	return true;
+      /* [$ra + offset] */
+      if (GET_CODE (addr) == PLUS
+	  && REG_P (XEXP (addr, 0))
+	  && CONST_INT_P (XEXP (addr, 1)))
+	return true;
+    }
+  return false;
+}
+
+
+/* Group float load-store instructions:
+   la $ra, symbol
+   flsi $rt, [$ra + offset] */
+
+static void
+nds32_group_float_insns (rtx insn)
+{
+  df_ref def_record, use_record;
+  df_link *link;
+  rtx_insn *use_insn = NULL;
+  rtx group_id;
+
+  def_record = DF_INSN_DEFS (insn);
+
+  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
+    {
+      if (!DF_REF_INSN_INFO (link->ref))
+	continue;
+
+      use_insn = DF_REF_INSN (link->ref);
+
+      /* Skip if define insn and use insn not in the same basic block.  */
+      if (!dominated_by_p (CDI_DOMINATORS,
+			   BLOCK_FOR_INSN (use_insn),
+			   BLOCK_FOR_INSN (insn)))
+	return;
+
+      /* Skip if the low-part used register is from different high-part
+	 instructions.  */
+      use_record = DF_INSN_USES (use_insn);
+      if (DF_REF_CHAIN (use_record) && DF_REF_CHAIN (use_record)->next)
+	return;
+
+      /* Skip if use_insn not active insn.  */
+      if (!active_insn_p (use_insn))
+	return;
+
+      if (!nds32_float_reg_load_store_p (use_insn)
+	  || find_post_update_rtx (use_insn) != -1)
+	return;
+    }
+
+  group_id = GEN_INT (relax_group_id);
+  /* Insert .relax_* directive for insn.  */
+  emit_insn_before (gen_relax_group (group_id), insn);
+
+  /* Scan the use insns and insert the directive.  */
+  for (link = DF_REF_CHAIN (def_record); link; link = link->next)
+    {
+      if (!DF_REF_INSN_INFO (link->ref))
+	continue;
+
+      use_insn = DF_REF_INSN (link->ref);
+
+      /* Insert .relax_* directive.  */
+	emit_insn_before (gen_relax_group (group_id), use_insn);
     }
 
   relax_group_id++;
@@ -271,8 +515,21 @@ nds32_relax_group (void)
 	  /* Find sethi ra, symbol  instruction.  */
 	  if (recog_memoized (insn) == CODE_FOR_sethi
 	      && nds32_symbolic_operand (XEXP (SET_SRC (PATTERN (insn)), 0),
-					 SImode))
+					 SImode)
+	      && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0)))
 	    nds32_group_insns (insn);
+	  else if (recog_memoized (insn) == CODE_FOR_tls_ie)
+	    nds32_group_tls_insn (insn);
+	  else if (TARGET_FPU_SINGLE
+		   && recog_memoized (insn) == CODE_FOR_move_addr
+		   && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0)))
+	    {
+	      nds32_group_float_insns (insn);
+	    }
+	}
+      else if (CALL_P (insn) && recog_memoized (insn) == CODE_FOR_tls_desc)
+	{
+	  nds32_group_tls_insn (insn);
 	}
     }
 
diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c
index b0151be39dc..7c93cd2edd0 100644
--- a/gcc/config/nds32/nds32-utils.c
+++ b/gcc/config/nds32/nds32-utils.c
@@ -142,6 +142,23 @@ store_double_p (rtx_insn *insn)
   return true;
 }
 
+bool
+store_offset_reg_p (rtx_insn *insn)
+{
+  if (get_attr_type (insn) != TYPE_STORE)
+    return false;
+
+  rtx offset_rtx = extract_offset_rtx (insn);
+
+  if (offset_rtx == NULL_RTX)
+    return false;
+
+  if (REG_P (offset_rtx))
+    return true;
+
+  return false;
+}
+
 /* Determine if INSN is a post update insn.  */
 bool
 post_update_insn_p (rtx_insn *insn)
@@ -316,22 +333,114 @@ extract_base_reg (rtx_insn *insn)
   if (REG_P (XEXP (mem_rtx, 0)))
     return XEXP (mem_rtx, 0);
 
+  /* (mem (lo_sum (reg) (symbol_ref)) */
+  if (GET_CODE (XEXP (mem_rtx, 0)) == LO_SUM)
+    return XEXP (XEXP (mem_rtx, 0), 0);
+
   plus_rtx = XEXP (mem_rtx, 0);
 
   if (GET_CODE (plus_rtx) == SYMBOL_REF
       || GET_CODE (plus_rtx) == CONST)
     return NULL_RTX;
 
-  gcc_assert (GET_CODE (plus_rtx) == PLUS
-	      || GET_CODE (plus_rtx) == POST_INC
-	      || GET_CODE (plus_rtx) == POST_DEC
-	      || GET_CODE (plus_rtx) == POST_MODIFY);
-  gcc_assert (REG_P (XEXP (plus_rtx, 0)));
   /* (mem (plus (reg) (const_int))) or
+     (mem (plus (mult (reg) (const_int 4)) (reg))) or
      (mem (post_inc (reg))) or
      (mem (post_dec (reg))) or
      (mem (post_modify (reg) (plus (reg) (reg))))  */
-  return XEXP (plus_rtx, 0);
+  gcc_assert (GET_CODE (plus_rtx) == PLUS
+	      || GET_CODE (plus_rtx) == POST_INC
+	      || GET_CODE (plus_rtx) == POST_DEC
+	      || GET_CODE (plus_rtx) == POST_MODIFY);
+
+  if (REG_P (XEXP (plus_rtx, 0)))
+    return XEXP (plus_rtx, 0);
+
+  gcc_assert (REG_P (XEXP (plus_rtx, 1)));
+  return XEXP (plus_rtx, 1);
+}
+
+/* Extract the offset rtx from load/store insns.  The function returns
+   NULL_RTX if offset is absent.  */
+rtx
+extract_offset_rtx (rtx_insn *insn)
+{
+  rtx mem_rtx;
+  rtx plus_rtx;
+  rtx offset_rtx;
+
+  /* Find the MEM rtx.  The multiple load/store insns doens't have
+     the offset field so we can return NULL_RTX here.  */
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LOAD_MULTIPLE:
+    case TYPE_STORE_MULTIPLE:
+      return NULL_RTX;
+
+    case TYPE_LOAD:
+    case TYPE_FLOAD:
+    case TYPE_STORE:
+    case TYPE_FSTORE:
+      mem_rtx = extract_mem_rtx (insn);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (MEM_P (mem_rtx));
+
+  /* (mem (reg))  */
+  if (REG_P (XEXP (mem_rtx, 0)))
+    return NULL_RTX;
+
+  plus_rtx = XEXP (mem_rtx, 0);
+
+  switch (GET_CODE (plus_rtx))
+    {
+    case SYMBOL_REF:
+    case CONST:
+    case POST_INC:
+    case POST_DEC:
+      return NULL_RTX;
+
+    case PLUS:
+      /* (mem (plus (reg) (const_int))) or
+         (mem (plus (mult (reg) (const_int 4)) (reg))) */
+      if (REG_P (XEXP (plus_rtx, 0)))
+        offset_rtx = XEXP (plus_rtx, 1);
+      else
+	{
+	  gcc_assert (REG_P (XEXP (plus_rtx, 1)));
+	  offset_rtx = XEXP (plus_rtx, 0);
+	}
+
+      if (ARITHMETIC_P (offset_rtx))
+	{
+	  gcc_assert (GET_CODE (offset_rtx) == MULT);
+	  gcc_assert (REG_P (XEXP (offset_rtx, 0)));
+	  offset_rtx = XEXP (offset_rtx, 0);
+	}
+      break;
+
+    case LO_SUM:
+      /* (mem (lo_sum (reg) (symbol_ref)) */
+      offset_rtx = XEXP (plus_rtx, 1);
+      break;
+
+    case POST_MODIFY:
+      /* (mem (post_modify (reg) (plus (reg) (reg / const_int)))) */
+      gcc_assert (REG_P (XEXP (plus_rtx, 0)));
+      plus_rtx = XEXP (plus_rtx, 1);
+      gcc_assert (GET_CODE (plus_rtx) == PLUS);
+      offset_rtx = XEXP (plus_rtx, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return offset_rtx;
 }
 
 /* Extract the register of the shift operand from an ALU_SHIFT rtx.  */
@@ -413,6 +522,7 @@ extract_mac_non_acc_rtx (rtx_insn *insn)
   switch (get_attr_type (insn))
     {
     case TYPE_MAC:
+    case TYPE_DMAC:
       if (REG_P (XEXP (exp, 0)))
 	return XEXP (exp, 1);
       else
@@ -423,6 +533,19 @@ extract_mac_non_acc_rtx (rtx_insn *insn)
     }
 }
 
+/* Check if the DIV insn needs two write ports.  */
+bool
+divmod_p (rtx_insn *insn)
+{
+  gcc_assert (get_attr_type (insn) == TYPE_DIV);
+
+  if (INSN_CODE (insn) == CODE_FOR_divmodsi4
+      || INSN_CODE (insn) == CODE_FOR_udivmodsi4)
+    return true;
+
+  return false;
+}
+
 /* Extract the rtx representing the branch target to help recognize
    data hazards.  */
 rtx
diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c
index 8994c13d7b0..1d23ec3fb9a 100644
--- a/gcc/config/nds32/nds32.c
+++ b/gcc/config/nds32/nds32.c
@@ -305,6 +305,7 @@ static const struct attribute_spec nds32_attribute_table[] =
   { "nested",       0,  0, false, false, false, false, NULL, NULL },
   { "not_nested",   0,  0, false, false, false, false, NULL, NULL },
   { "nested_ready", 0,  0, false, false, false, false, NULL, NULL },
+  { "critical",     0,  0, false, false, false, false, NULL, NULL },
 
   /* The attributes describing isr register save scheme.  */
   { "save_all",     0,  0, false, false, false, false, NULL, NULL },
@@ -314,9 +315,19 @@ static const struct attribute_spec nds32_attribute_table[] =
   { "nmi",          1,  1, false, false, false, false, NULL, NULL },
   { "warm",         1,  1, false, false, false, false, NULL, NULL },
 
+  /* The attributes describing isr security level. */
+  { "secure",       1,  1, false, false, false, false, NULL, NULL },
+
   /* The attribute telling no prologue/epilogue.  */
   { "naked",        0,  0, false, false, false, false, NULL, NULL },
 
+  /* The attribute is used to tell this function to be ROM patch.  */
+  { "indirect_call",0,  0, false, false, false, false, NULL, NULL },
+
+  /* FOR BACKWARD COMPATIBILITY,
+     this attribute also tells no prologue/epilogue.  */
+  { "no_prologue",  0,  0, false, false, false, false, NULL, NULL },
+
   /* The last attribute spec is set to be NULL.  */
   { NULL,           0,  0, false, false, false, false, NULL, NULL }
 };
@@ -345,6 +356,10 @@ nds32_init_machine_status (void)
   /* Initially this function is not under strictly aligned situation.  */
   machine->strict_aligned_p = 0;
 
+  /* Initially this function has no naked and no_prologue attributes.  */
+  machine->attr_naked_p = 0;
+  machine->attr_no_prologue_p = 0;
+
   return machine;
 }
 
@@ -362,6 +377,15 @@ nds32_compute_stack_frame (void)
      needs prologue/epilogue.  */
   cfun->machine->naked_p = 0;
 
+  /* We need to mark whether this function has naked and no_prologue
+     attribute so that we can distinguish the difference if users applies
+     -mret-in-naked-func option.  */
+  cfun->machine->attr_naked_p
+    = lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      ? 1 : 0;
+  cfun->machine->attr_no_prologue_p
+    = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
+      ? 1 : 0;
 
   /* If __builtin_eh_return is used, we better have frame pointer needed
      so that we can easily locate the stack slot of return address.  */
@@ -432,7 +456,8 @@ nds32_compute_stack_frame (void)
 
   /* If $gp value is required to be saved on stack, it needs 4 bytes space.
      Check whether we are using PIC code genration.  */
-  cfun->machine->gp_size = (flag_pic) ? 4 : 0;
+  cfun->machine->gp_size =
+    (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ? 4 : 0;
 
   /* If $lp value is required to be saved on stack, it needs 4 bytes space.
      Check whether $lp is ever live.  */
@@ -497,7 +522,7 @@ nds32_compute_stack_frame (void)
     }
 
   /* Check if this function can omit prologue/epilogue code fragment.
-     If there is 'naked' attribute in this function,
+     If there is 'no_prologue'/'naked'/'secure' attribute in this function,
      we can set 'naked_p' flag to indicate that
      we do not have to generate prologue/epilogue.
      Or, if all the following conditions succeed,
@@ -510,14 +535,17 @@ nds32_compute_stack_frame (void)
 		    is no outgoing size.
        condition 3: There is no local_size, which means
 		    we do not need to adjust $sp.  */
-  if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+  if (lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl))
+      || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl))
+      || lookup_attribute ("secure", DECL_ATTRIBUTES (current_function_decl))
       || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM
 	  && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM
 	  && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM
 	  && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM
 	  && !df_regs_ever_live_p (FP_REGNUM)
 	  && !df_regs_ever_live_p (LP_REGNUM)
-	  && cfun->machine->local_size == 0))
+	  && cfun->machine->local_size == 0
+	  && !flag_pic))
     {
       /* Set this function 'naked_p' and other functions can check this flag.
 	 Note that in nds32 port, the 'naked_p = 1' JUST means there is no
@@ -1259,6 +1287,32 @@ nds32_emit_stack_v3pop (unsigned Rb,
   REG_NOTES (parallel_insn) = dwarf;
 }
 
+static void
+nds32_emit_load_gp (void)
+{
+  rtx got_symbol, pat;
+
+  /* Initial GLOBAL OFFSET TABLE don't do the scheduling.  */
+  emit_insn (gen_blockage ());
+
+  got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  /* sethi $gp, _GLOBAL_OFFSET_TABLE_ -8 */
+  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT);
+  pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-8)));
+  emit_insn (gen_sethi (pic_offset_table_rtx,pat));
+
+  /* ori $gp, $gp, _GLOBAL_OFFSET_TABLE_ -4 */
+  pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT);
+  pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-4)));
+  emit_insn (gen_lo_sum (pic_offset_table_rtx, pic_offset_table_rtx, pat));
+
+  /* add5.pc $gp */
+  emit_insn (gen_add_pc (pic_offset_table_rtx, pic_offset_table_rtx));
+
+  /* Initial GLOBAL OFFSET TABLE don't do the scheduling.  */
+  emit_insn (gen_blockage ());
+}
+
 /* Function that may creates more instructions
    for large value on adjusting stack pointer.
 
@@ -1342,17 +1396,25 @@ nds32_needs_double_word_align (machine_mode mode, const_tree type)
 }
 
 /* Return true if FUNC is a naked function.  */
-static bool
+bool
 nds32_naked_function_p (tree func)
 {
-  tree t;
+  /* FOR BACKWARD COMPATIBILITY,
+     we need to support 'no_prologue' attribute as well.  */
+  tree t_naked;
+  tree t_no_prologue;
 
   if (TREE_CODE (func) != FUNCTION_DECL)
     abort ();
 
-  t = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
+  /* We have to use lookup_attribute() to check attributes.
+     Because attr_naked_p and attr_no_prologue_p are set in
+     nds32_compute_stack_frame() and the function has not been
+     invoked yet.  */
+  t_naked       = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
+  t_no_prologue = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (func));
 
-  return (t != NULL_TREE);
+  return ((t_naked != NULL_TREE) || (t_no_prologue != NULL_TREE));
 }
 
 /* Function that determine whether a load postincrement is a good thing to use
@@ -1569,6 +1631,11 @@ nds32_register_pass (
 static void
 nds32_register_passes (void)
 {
+  nds32_register_pass (
+    make_pass_nds32_fp_as_gp,
+    PASS_POS_INSERT_BEFORE,
+    "ira");
+
   nds32_register_pass (
     make_pass_nds32_relax_opt,
     PASS_POS_INSERT_AFTER,
@@ -1636,6 +1703,9 @@ nds32_conditional_register_usage (void)
 {
   int regno;
 
+  if (TARGET_LINUX_ABI)
+    fixed_regs[TP_REGNUM] = 1;
+
   if (TARGET_HARD_FLOAT)
     {
       for (regno = NDS32_FIRST_FPR_REGNUM;
@@ -1987,6 +2057,16 @@ nds32_function_arg_boundary (machine_mode mode, const_tree type)
 	  : PARM_BOUNDARY);
 }
 
+bool
+nds32_vector_mode_supported_p (machine_mode mode)
+{
+  if (mode == V4QImode
+      || mode == V2HImode)
+    return NDS32_EXT_DSP_P ();
+
+  return false;
+}
+
 /* -- How Scalar Function Values Are Returned.  */
 
 static rtx
@@ -2124,56 +2204,12 @@ static void
 nds32_asm_function_end_prologue (FILE *file)
 {
   fprintf (file, "\t! END PROLOGUE\n");
-
-  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
-     we can generate special directive: ".omit_fp_begin"
-     to guide linker doing fp-as-gp optimization.
-     However, for a naked function, which means
-     it should not have prologue/epilogue,
-     using fp-as-gp still requires saving $fp by push/pop behavior and
-     there is no benefit to use fp-as-gp on such small function.
-     So we need to make sure this function is NOT naked as well.  */
-  if (!frame_pointer_needed
-      && !cfun->machine->naked_p
-      && cfun->machine->fp_as_gp_p)
-    {
-      fprintf (file, "\t! ----------------------------------------\n");
-      fprintf (file, "\t! Guide linker to do "
-		     "link time optimization: fp-as-gp\n");
-      fprintf (file, "\t! We add one more instruction to "
-		     "initialize $fp near to $gp location.\n");
-      fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n");
-      fprintf (file, "\t! this extra instruction should be "
-		     "eliminated at link stage.\n");
-      fprintf (file, "\t.omit_fp_begin\n");
-      fprintf (file, "\tla\t$fp,_FP_BASE_\n");
-      fprintf (file, "\t! ----------------------------------------\n");
-    }
 }
 
 /* Before rtl epilogue has been expanded, this function is used.  */
 static void
 nds32_asm_function_begin_epilogue (FILE *file)
 {
-  /* If frame pointer is NOT needed and -mfp-as-gp is issued,
-     we can generate special directive: ".omit_fp_end"
-     to claim fp-as-gp optimization range.
-     However, for a naked function,
-     which means it should not have prologue/epilogue,
-     using fp-as-gp still requires saving $fp by push/pop behavior and
-     there is no benefit to use fp-as-gp on such small function.
-     So we need to make sure this function is NOT naked as well.  */
-  if (!frame_pointer_needed
-      && !cfun->machine->naked_p
-      && cfun->machine->fp_as_gp_p)
-    {
-      fprintf (file, "\t! ----------------------------------------\n");
-      fprintf (file, "\t! Claim the range of fp-as-gp "
-		     "link time optimization\n");
-      fprintf (file, "\t.omit_fp_end\n");
-      fprintf (file, "\t! ----------------------------------------\n");
-    }
-
   fprintf (file, "\t! BEGIN EPILOGUE\n");
 }
 
@@ -2200,6 +2236,26 @@ nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
 		? 1
 		: 0);
 
+  if (flag_pic)
+    {
+      fprintf (file, "\tsmw.adm\t$r31, [$r31], $r31, 4\n");
+      fprintf (file, "\tsethi\t%s, hi20(_GLOBAL_OFFSET_TABLE_-8)\n",
+		      reg_names [PIC_OFFSET_TABLE_REGNUM]);
+      fprintf (file, "\tori\t%s, %s, lo12(_GLOBAL_OFFSET_TABLE_-4)\n",
+		      reg_names [PIC_OFFSET_TABLE_REGNUM],
+		      reg_names [PIC_OFFSET_TABLE_REGNUM]);
+
+      if (TARGET_ISA_V3)
+	fprintf (file, "\tadd5.pc\t$gp\n");
+      else
+	{
+	  fprintf (file, "\tmfusr\t$ta, $pc\n");
+	  fprintf (file, "\tadd\t%s, $ta, %s\n",
+			  reg_names [PIC_OFFSET_TABLE_REGNUM],
+			  reg_names [PIC_OFFSET_TABLE_REGNUM]);
+	}
+    }
+
   if (delta != 0)
     {
       if (satisfies_constraint_Is15 (GEN_INT (delta)))
@@ -2224,9 +2280,23 @@ nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
 	}
     }
 
-  fprintf (file, "\tb\t");
-  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
-  fprintf (file, "\n");
+  if (flag_pic)
+    {
+      fprintf (file, "\tla\t$ta, ");
+      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+      fprintf (file, "@PLT\n");
+      fprintf (file, "\t! epilogue\n");
+      fprintf (file, "\tlwi.bi\t%s, [%s], 4\n",
+	       reg_names[PIC_OFFSET_TABLE_REGNUM],
+	       reg_names[STACK_POINTER_REGNUM]);
+      fprintf (file, "\tbr\t$ta\n");
+    }
+  else
+    {
+      fprintf (file, "\tb\t");
+      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+      fprintf (file, "\n");
+    }
 
   final_end_function ();
 }
@@ -2242,15 +2312,20 @@ nds32_function_ok_for_sibcall (tree decl,
 
   /* 1. Do not apply sibling call if -mv3push is enabled,
 	because pop25 instruction also represents return behavior.
-     2. If this function is a variadic function, do not apply sibling call
+     2. If this function is a isr function, do not apply sibling call
+	because it may perform the behavior that user does not expect.
+     3. If this function is a variadic function, do not apply sibling call
 	because the stack layout may be a mess.
-     3. We don't want to apply sibling call optimization for indirect
+     4. We don't want to apply sibling call optimization for indirect
 	sibcall because the pop behavior in epilogue may pollute the
 	content of caller-saved regsiter when the register is used for
-	indirect sibcall.  */
+	indirect sibcall.
+     5. In pic mode, it may use some registers for PLT call.  */
   return (!TARGET_V3PUSH
+	  && !nds32_isr_function_p (current_function_decl)
 	  && (cfun->machine->va_args_size == 0)
-	  && decl);
+	  && decl
+	  && !flag_pic);
 }
 
 /* Determine whether we need to enable warning for function return check.  */
@@ -2566,6 +2641,13 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
 
     case SYMBOL_REF:
       /* (mem (symbol_ref A)) => [symbol_ref] */
+
+      if (flag_pic || SYMBOL_REF_TLS_MODEL (x))
+	return false;
+
+      if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x))
+	return false;
+
       /* If -mcmodel=large, the 'symbol_ref' is not a valid address
 	 during or after LRA/reload phase.  */
       if (TARGET_CMODEL_LARGE
@@ -2577,7 +2659,8 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
 	 the 'symbol_ref' is not a valid address during or after
 	 LRA/reload phase.  */
       if (TARGET_CMODEL_MEDIUM
-	  && NDS32_SYMBOL_REF_RODATA_P (x)
+	  && (NDS32_SYMBOL_REF_RODATA_P (x)
+	      || CONSTANT_POOL_ADDRESS_P (x))
 	  && (reload_completed
 	      || reload_in_progress
 	      || lra_in_progress))
@@ -2599,6 +2682,10 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
 	    {
 	      /* Now we see the [ + const_addr ] pattern, but we need
 		 some further checking.  */
+
+	      if (flag_pic || SYMBOL_REF_TLS_MODEL (op0))
+		return false;
+
 	      /* If -mcmodel=large, the 'const_addr' is not a valid address
 		 during or after LRA/reload phase.  */
 	      if (TARGET_CMODEL_LARGE
@@ -2675,17 +2762,202 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict)
 
     case LO_SUM:
       /* (mem (lo_sum (reg) (symbol_ref))) */
-      /* (mem (lo_sum (reg) (const))) */
-      gcc_assert (REG_P (XEXP (x, 0)));
-      if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF
-	  || GET_CODE (XEXP (x, 1)) == CONST)
-	return nds32_legitimate_address_p (mode, XEXP (x, 1), strict);
-      else
+      /* (mem (lo_sum (reg) (const (plus (symbol_ref) (reg)))) */
+      /* TLS case: (mem (lo_sum (reg) (const (unspec symbol_ref X)))) */
+      /* The LO_SUM is a valid address if and only if we would like to
+	 generate 32-bit full address memory access with any of following
+	 circumstance:
+	   1. -mcmodel=large.
+	   2. -mcmodel=medium and the symbol_ref references to rodata.  */
+      {
+	rtx sym = NULL_RTX;
+
+	if (flag_pic)
+	  return false;
+
+	if (!REG_P (XEXP (x, 0)))
+	  return false;
+
+	if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+	  sym = XEXP (x, 1);
+	else if (GET_CODE (XEXP (x, 1)) == CONST)
+	  {
+	    rtx plus = XEXP(XEXP (x, 1), 0);
+	    if (GET_CODE (plus) == PLUS)
+	      sym = XEXP (plus, 0);
+	    else if (GET_CODE (plus) == UNSPEC)
+	      sym = XVECEXP (plus, 0, 0);
+	  }
+	else
+	  return false;
+
+	gcc_assert (GET_CODE (sym) == SYMBOL_REF);
+
+	if (TARGET_ICT_MODEL_LARGE
+	    && nds32_indirect_call_referenced_p (sym))
+	  return true;
+
+	if (TARGET_CMODEL_LARGE)
+	  return true;
+	else if (TARGET_CMODEL_MEDIUM
+		 && NDS32_SYMBOL_REF_RODATA_P (sym))
+	  return true;
+	else
+	  return false;
+      }
+
+    default:
+      return false;
+    }
+}
+
+static rtx
+nds32_legitimize_address (rtx x,
+			  rtx oldx ATTRIBUTE_UNUSED,
+			  machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (nds32_tls_referenced_p (x))
+    x = nds32_legitimize_tls_address (x);
+  else if (flag_pic && SYMBOLIC_CONST_P (x))
+    x = nds32_legitimize_pic_address (x);
+  else if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x))
+    x = nds32_legitimize_ict_address (x);
+
+  return x;
+}
+
+static bool
+nds32_legitimate_constant_p (machine_mode mode, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_DOUBLE:
+      if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+	  && (mode == DFmode || mode == SFmode))
 	return false;
+      break;
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (!CONST_INT_P (XEXP (x, 1)))
+	    return false;
+	  x = XEXP (x, 0);
+	}
+
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	    case UNSPEC_TLSGD:
+	    case UNSPEC_TLSLD:
+	    case UNSPEC_TLSIE:
+	    case UNSPEC_TLSLE:
+	    case UNSPEC_ICT:
+	      return false;
+	    default:
+	      return true;
+	    }
+	}
+      break;
+    case SYMBOL_REF:
+      /* TLS symbols need a call to resolve in
+	 precompute_register_parameters.  */
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return false;
+      break;
+    default:
+      return true;
+    }
+
+  return true;
+}
 
+/* Reorgnize the UNSPEC CONST and return its direct symbol.  */
+static rtx
+nds32_delegitimize_address (rtx x)
+{
+  x = delegitimize_mem_from_attrs (x);
+
+  if (GET_CODE(x) == CONST)
+    {
+      rtx inner = XEXP (x, 0);
+
+      /* Handle for GOTOFF.  */
+      if (GET_CODE (inner) == PLUS)
+	inner = XEXP (inner, 0);
+
+      if (GET_CODE (inner) == UNSPEC)
+	{
+	  switch (XINT (inner, 1))
+	    {
+	    case UNSPEC_GOTINIT:
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	    case UNSPEC_TLSGD:
+	    case UNSPEC_TLSLD:
+	    case UNSPEC_TLSIE:
+	    case UNSPEC_TLSLE:
+	    case UNSPEC_ICT:
+	      x = XVECEXP (inner, 0, 0);
+	      break;
+	    default:
+	      break;
+	    }
+	}
+    }
+  return x;
+}
+
+static machine_mode
+nds32_vectorize_preferred_simd_mode (scalar_mode mode)
+{
+  if (!NDS32_EXT_DSP_P ())
+    return word_mode;
+
+  switch (mode)
+    {
+    case E_QImode:
+      return V4QImode;
+    case E_HImode:
+      return V2HImode;
+    default:
+      return word_mode;
+    }
+}
+
+static bool
+nds32_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      return !nds32_legitimate_constant_p (mode, x);
+    case SYMBOL_REF:
+      /* All symbols have to be accessed through gp-relative in PIC mode.  */
+      /* We don't want to force symbol as constant pool in .text section,
+	 because we use the gp-relatived instruction to load in small
+	 or medium model.  */
+      if (flag_pic
+	  || SYMBOL_REF_TLS_MODEL (x)
+	  || TARGET_CMODEL_SMALL
+	  || TARGET_CMODEL_MEDIUM)
+	return true;
+      break;
+    case CONST_INT:
+    case CONST_DOUBLE:
+      if (flag_pic && (lra_in_progress || reload_completed))
+	return true;
+      break;
     default:
       return false;
     }
+  return false;
 }
 
 
@@ -2731,13 +3003,33 @@ nds32_canonicalize_comparison (int *code,
 /* Describing Relative Costs of Operations.  */
 
 static int
-nds32_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
+nds32_register_move_cost (machine_mode mode,
 			  reg_class_t from,
 			  reg_class_t to)
 {
+  /* In garywolf cpu, FPR to GPR is chaper than other cpu.  */
+  if (TARGET_PIPELINE_GRAYWOLF)
+    {
+      if (GET_MODE_SIZE (mode) == 8)
+	{
+	  /* DPR to GPR.  */
+	  if (from == FP_REGS && to != FP_REGS)
+	    return 3;
+	  /* GPR to DPR.  */
+	  if (from != FP_REGS && to == FP_REGS)
+	    return 2;
+	}
+      else
+	{
+	  if ((from == FP_REGS && to != FP_REGS)
+	      || (from != FP_REGS && to == FP_REGS))
+	    return 2;
+	}
+    }
+
   if ((from == FP_REGS && to != FP_REGS)
       || (from != FP_REGS && to == FP_REGS))
-    return 9;
+    return 3;
   else if (from == HIGH_REGS || to == HIGH_REGS)
     return optimize_size ? 6 : 2;
   else
@@ -2825,6 +3117,9 @@ nds32_asm_file_start (void)
 {
   default_file_start ();
 
+  if (flag_pic)
+    fprintf (asm_out_file, "\t.pic\n");
+
   /* Tell assembler which ABI we are using.  */
   fprintf (asm_out_file, "\t! ABI version\n");
   if (TARGET_HARD_FLOAT)
@@ -2835,10 +3130,36 @@ nds32_asm_file_start (void)
   /* Tell assembler that this asm code is generated by compiler.  */
   fprintf (asm_out_file, "\t! This asm file is generated by compiler\n");
   fprintf (asm_out_file, "\t.flag\tverbatim\n");
-  /* Give assembler the size of each vector for interrupt handler.  */
-  fprintf (asm_out_file, "\t! This vector size directive is required "
-			 "for checking inconsistency on interrupt handler\n");
-  fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
+
+  /* Insert directive for linker to distinguish object's ict flag.  */
+  if (!TARGET_LINUX_ABI)
+    {
+      if (TARGET_ICT_MODEL_LARGE)
+	fprintf (asm_out_file, "\t.ict_model\tlarge\n");
+      else
+	fprintf (asm_out_file, "\t.ict_model\tsmall\n");
+    }
+
+  /* We need to provide the size of each vector for interrupt handler
+     under elf toolchain.  */
+  if (!TARGET_LINUX_ABI)
+    {
+      fprintf (asm_out_file, "\t! This vector size directive is required "
+			     "for checking inconsistency on interrupt handler\n");
+      fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
+    }
+
+  /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os,
+     the compiler may produce 'la $fp,_FP_BASE_' instruction
+     at prologue for fp-as-gp optimization.
+     We should emit weak reference of _FP_BASE_ to avoid undefined reference
+     in case user does not pass '--relax' option to linker.  */
+  if (!TARGET_LINUX_ABI && (TARGET_FORCE_FP_AS_GP || optimize_size))
+    {
+      fprintf (asm_out_file, "\t! This weak reference is required to do "
+			     "fp-as-gp link time optimization\n");
+      fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n");
+    }
 
   fprintf (asm_out_file, "\t! ------------------------------------\n");
 
@@ -2849,6 +3170,49 @@ nds32_asm_file_start (void)
   if (TARGET_ISA_V3M)
     fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M");
 
+  switch (nds32_cpu_option)
+    {
+    case CPU_N6:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N6");
+      break;
+
+    case CPU_N7:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N7");
+      break;
+
+    case CPU_N8:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N8");
+      break;
+
+    case CPU_E8:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "E8");
+      break;
+
+    case CPU_N9:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N9");
+      break;
+
+    case CPU_N10:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N10");
+      break;
+
+    case CPU_GRAYWOLF:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "Graywolf");
+      break;
+
+    case CPU_N12:
+    case CPU_N13:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N13");
+      break;
+
+    case CPU_SIMPLE:
+      fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "SIMPLE");
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
   if (TARGET_CMODEL_SMALL)
     fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "SMALL");
   if (TARGET_CMODEL_MEDIUM)
@@ -2926,9 +3290,65 @@ nds32_asm_file_end (void)
 {
   nds32_asm_file_end_for_isr ();
 
+  /* The NDS32 Linux stack is mapped non-executable by default, so add a
+     .note.GNU-stack section.  */
+  if (TARGET_LINUX_ABI)
+    file_end_indicate_exec_stack ();
+
   fprintf (asm_out_file, "\t! ------------------------------------\n");
 }
 
+static bool
+nds32_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_GOTINIT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_GOTOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_GOT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_PLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PLT", file);
+	  break;
+	case UNSPEC_TLSGD:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TLSDESC", file);
+	  break;
+	case UNSPEC_TLSLD:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TLSDESC", file);
+	  break;
+	case UNSPEC_TLSIE:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTTPOFF", file);
+	  break;
+	case UNSPEC_TLSLE:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", file);
+	  break;
+	case UNSPEC_ICT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@ICT", file);
+	  break;
+	default:
+	  return false;
+	}
+      return true;
+    }
+  else
+    return false;
+}
+
 /* -- Output and Generation of Labels.  */
 
 static void
@@ -2944,13 +3364,15 @@ nds32_asm_globalize_label (FILE *stream, const char *name)
 static void
 nds32_print_operand (FILE *stream, rtx x, int code)
 {
+  HOST_WIDE_INT op_value = 0;
   HOST_WIDE_INT one_position;
   HOST_WIDE_INT zero_position;
   bool pick_lsb_p = false;
   bool pick_msb_p = false;
   int regno;
 
-  int op_value;
+  if (CONST_INT_P (x))
+    op_value = INTVAL (x);
 
   switch (code)
     {
@@ -2976,6 +3398,18 @@ nds32_print_operand (FILE *stream, rtx x, int code)
 
       /* No need to handle following process, so return immediately.  */
       return;
+
+    case 'v':
+      gcc_assert (CONST_INT_P (x)
+		  && (INTVAL (x) == 0
+		      || INTVAL (x) == 8
+		      || INTVAL (x) == 16
+		      || INTVAL (x) == 24));
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
+
+      /* No need to handle following process, so return immediately.  */
+      return;
+
     case 'B':
       /* Use exact_log2() to search the 1-bit position.  */
       gcc_assert (CONST_INT_P (x));
@@ -3003,7 +3437,6 @@ nds32_print_operand (FILE *stream, rtx x, int code)
     case 'V':
       /* 'x' is supposed to be CONST_INT, get the value.  */
       gcc_assert (CONST_INT_P (x));
-      op_value = INTVAL (x);
 
       /* According to the Andes architecture,
 	 the system/user register index range is 0 ~ 1023.
@@ -3083,8 +3516,15 @@ nds32_print_operand (FILE *stream, rtx x, int code)
   switch (GET_CODE (x))
     {
     case LABEL_REF:
+      output_addr_const (stream, x);
+      break;
+
     case SYMBOL_REF:
       output_addr_const (stream, x);
+
+      if (!TARGET_LINUX_ABI && nds32_indirect_call_referenced_p (x))
+	fprintf (stream, "@ICT");
+
       break;
 
     case REG:
@@ -3167,6 +3607,17 @@ nds32_print_operand (FILE *stream, rtx x, int code)
       output_addr_const (stream, x);
       break;
 
+    case CONST_VECTOR:
+      fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x));
+      break;
+
+    case LO_SUM:
+      /* This is a special case for inline assembly using memory address 'p'.
+	 The inline assembly code is expected to use pesudo instruction
+	 for the operand.  EX: la  */
+      output_addr_const (stream, XEXP(x, 1));
+      break;
+
     default:
       /* Generally, output_addr_const () is able to handle most cases.
 	 We want to see what CODE could appear,
@@ -3178,7 +3629,9 @@ nds32_print_operand (FILE *stream, rtx x, int code)
 }
 
 static void
-nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
+nds32_print_operand_address (FILE *stream,
+			     machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x)
 {
   rtx op0, op1;
 
@@ -3193,6 +3646,16 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
       fputs ("]", stream);
       break;
 
+    case LO_SUM:
+      /* This is a special case for inline assembly using memory operand 'm'.
+	 The inline assembly code is expected to use pesudo instruction
+	 for the operand.  EX: [ls].[bhw]  */
+      fputs ("[ + ", stream);
+      op1 = XEXP (x, 1);
+      output_addr_const (stream, op1);
+      fputs ("]", stream);
+      break;
+
     case REG:
       /* Forbid using static chain register ($r16)
 	 on reduced-set registers configuration.  */
@@ -3259,6 +3722,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
 			   reg_names[REGNO (XEXP (op0, 0))],
 			   sv);
 	}
+      else if (GET_CODE (op0) == ASHIFT && REG_P (op1))
+	{
+	  /* [Ra + Rb << sv]
+	     In normal, ASHIFT can be converted to MULT like above case.
+	     But when the address rtx does not go through canonicalize_address
+	     defined in fwprop, we'll need this case.  */
+	  int sv = INTVAL (XEXP (op0, 1));
+	  gcc_assert (sv <= 3 && sv >=0);
+
+	  fprintf (stream, "[%s + %s << %d]",
+		   reg_names[REGNO (op1)],
+		   reg_names[REGNO (XEXP (op0, 0))],
+		   sv);
+	}
       else
 	{
 	  /* The control flow is not supposed to be here.  */
@@ -3453,6 +3930,27 @@ nds32_merge_decl_attributes (tree olddecl, tree newdecl)
 static void
 nds32_insert_attributes (tree decl, tree *attributes)
 {
+  /* A "indirect_call" function attribute implies "noinline" and "noclone"
+     for elf toolchain to support ROM patch mechanism.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && lookup_attribute ("indirect_call", *attributes) != NULL)
+    {
+      tree new_attrs = *attributes;
+
+      if (TARGET_LINUX_ABI)
+	error("cannot use indirect_call attribute under linux toolchain");
+
+      if (lookup_attribute ("noinline", new_attrs) == NULL)
+	new_attrs = tree_cons (get_identifier ("noinline"), NULL, new_attrs);
+      if (lookup_attribute ("noclone", new_attrs) == NULL)
+	new_attrs = tree_cons (get_identifier ("noclone"), NULL, new_attrs);
+
+      if (!TREE_PUBLIC (decl))
+	error("indirect_call attribute can't apply for static function");
+
+      *attributes = new_attrs;
+    }
+
   /* For function declaration, we need to check isr-specific attributes:
        1. Call nds32_check_isr_attrs_conflict() to check any conflict.
        2. Check valid integer value for interrupt/exception.
@@ -3478,6 +3976,38 @@ nds32_insert_attributes (tree decl, tree *attributes)
       excp  = lookup_attribute ("exception", func_attrs);
       reset = lookup_attribute ("reset", func_attrs);
 
+      /* The following code may use attribute arguments.  If there is no
+	 argument from source code, it will cause segmentation fault.
+	 Therefore, return dircetly and report error message later.  */
+      if ((intr && TREE_VALUE (intr) == NULL)
+	  || (excp && TREE_VALUE (excp) == NULL)
+	  || (reset && TREE_VALUE (reset) == NULL))
+	return;
+
+      /* ------------------------------------------------------------- */
+      /* FIXME:
+	 FOR BACKWARD COMPATIBILITY, we need to support following patterns:
+
+	     __attribute__((interrupt("XXX;YYY;id=ZZZ")))
+	     __attribute__((exception("XXX;YYY;id=ZZZ")))
+	     __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ")))
+
+	 If interrupt/exception/reset appears and its argument is a
+	 STRING_CST, we will use other functions to parse string in the
+	 nds32_construct_isr_vectors_information() and then set necessary
+	 isr information in the nds32_isr_vectors[] array.  Here we can
+	 just return immediately to avoid new-syntax checking.  */
+      if (intr != NULL_TREE
+	  && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST)
+	return;
+      if (excp != NULL_TREE
+	  && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST)
+	return;
+      if (reset != NULL_TREE
+	  && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST)
+	return;
+      /* ------------------------------------------------------------- */
+
       if (intr || excp)
 	{
 	  /* Deal with interrupt/exception.  */
@@ -3597,7 +4127,9 @@ nds32_option_override (void)
     }
   if (TARGET_ISA_V3)
     {
-      /* Under V3 ISA, currently nothing should be strictly set.  */
+      /* If this is ARCH_V3J, we need to enable TARGET_REDUCED_REGS.  */
+      if (nds32_arch_option == ARCH_V3J)
+	target_flags |= MASK_REDUCED_REGS;
     }
   if (TARGET_ISA_V3M)
     {
@@ -3609,6 +4141,9 @@ nds32_option_override (void)
       target_flags &= ~MASK_EXT_PERF2;
       /* Under V3M ISA, we need to strictly disable TARGET_EXT_STRING.  */
       target_flags &= ~MASK_EXT_STRING;
+
+      if (flag_pic)
+	error ("not support -fpic option for v3m toolchain");
     }
 
   /* See if we are using reduced-set registers:
@@ -3626,6 +4161,12 @@ nds32_option_override (void)
 	fixed_regs[r] = call_used_regs[r] = 1;
     }
 
+  /* See if user explicitly would like to use fp-as-gp optimization.
+     If so, we must prevent $fp from being allocated
+     during register allocation.  */
+  if (TARGET_FORCE_FP_AS_GP)
+    fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1;
+
   if (!TARGET_16_BIT)
     {
       /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH.  */
@@ -3642,9 +4183,7 @@ nds32_option_override (void)
 	       "must be enable '-mext-fpu-sp' or '-mext-fpu-dp'");
     }
 
-  /* Currently, we don't support PIC code generation yet.  */
-  if (flag_pic)
-    sorry ("position-independent code not supported");
+  nds32_init_rtx_costs ();
 
   nds32_register_passes ();
 }
@@ -3658,8 +4197,11 @@ nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
 		     vec<const char *> &constraints ATTRIBUTE_UNUSED,
 		     vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM));
-  SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM);
+  if (!flag_inline_asm_r15)
+    {
+      clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM));
+      SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM);
+    }
   return NULL;
 }
 
@@ -3686,6 +4228,13 @@ nds32_expand_builtin (tree exp,
   return nds32_expand_builtin_impl (exp, target, subtarget, mode, ignore);
 }
 
+/* Implement TARGET_INIT_LIBFUNCS.  */
+static void
+nds32_init_libfuncs (void)
+{
+  if (TARGET_LINUX_ABI)
+    init_sync_libfuncs (UNITS_PER_WORD);
+}
 
 /* ------------------------------------------------------------------------ */
 
@@ -3702,6 +4251,16 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile)
   builtin_define ("__nds32__");
   builtin_define ("__NDS32__");
 
+  /* We need to provide builtin macro to describe the size of
+     each vector for interrupt handler under elf toolchain.  */
+  if (!TARGET_LINUX_ABI)
+    {
+      if (TARGET_ISR_VECTOR_SIZE_4_BYTE)
+	builtin_define ("__NDS32_ISR_VECTOR_SIZE_4__");
+      else
+	builtin_define ("__NDS32_ISR_VECTOR_SIZE_16__");
+    }
+
   if (TARGET_HARD_FLOAT)
     builtin_define ("__NDS32_ABI_2FP_PLUS__");
   else
@@ -3769,6 +4328,8 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile)
     builtin_define ("__NDS32_GP_DIRECT__");
   if (TARGET_VH)
     builtin_define ("__NDS32_VH__");
+  if (NDS32_EXT_DSP_P ())
+    builtin_define ("__NDS32_EXT_DSP__");
 
   if (TARGET_BIG_ENDIAN)
     builtin_define ("__big_endian__");
@@ -4041,6 +4602,10 @@ nds32_expand_prologue (void)
      The result will be in cfun->machine.  */
   nds32_compute_stack_frame ();
 
+  /* Check frame_pointer_needed again to prevent fp is need after reload.  */
+  if (frame_pointer_needed)
+    cfun->machine->fp_as_gp_p = false;
+
   /* If this is a variadic function, first we need to push argument
      registers that hold the unnamed argument value.  */
   if (cfun->machine->va_args_size != 0)
@@ -4065,7 +4630,7 @@ nds32_expand_prologue (void)
 
   /* If the function is 'naked',
      we do not have to generate prologue code fragment.  */
-  if (cfun->machine->naked_p)
+  if (cfun->machine->naked_p && !flag_pic)
     return;
 
   /* Get callee_first_regno and callee_last_regno.  */
@@ -4194,9 +4759,15 @@ nds32_expand_prologue (void)
 			       -1 * sp_adjust);
     }
 
-  /* Prevent the instruction scheduler from
-     moving instructions across the boundary.  */
-  emit_insn (gen_blockage ());
+  /* Emit gp setup instructions for -fpic.  */
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    nds32_emit_load_gp ();
+
+  /* If user applies -mno-sched-prolog-epilog option,
+     we need to prevent instructions of function body from being
+     scheduled with stack adjustment in prologue.  */
+  if (!flag_sched_prolog_epilog)
+    emit_insn (gen_blockage ());
 }
 
 /* Function for normal multiple pop epilogue.  */
@@ -4210,9 +4781,11 @@ nds32_expand_epilogue (bool sibcall_p)
      The result will be in cfun->machine.  */
   nds32_compute_stack_frame ();
 
-  /* Prevent the instruction scheduler from
-     moving instructions across the boundary.  */
-  emit_insn (gen_blockage ());
+  /* If user applies -mno-sched-prolog-epilog option,
+     we need to prevent instructions of function body from being
+     scheduled with stack adjustment in epilogue.  */
+  if (!flag_sched_prolog_epilog)
+    emit_insn (gen_blockage ());
 
   /* If the function is 'naked', we do not have to generate
      epilogue code fragment BUT 'ret' instruction.
@@ -4238,7 +4811,16 @@ nds32_expand_epilogue (bool sibcall_p)
       /* Generate return instruction by using 'return_internal' pattern.
 	 Make sure this instruction is after gen_blockage().  */
       if (!sibcall_p)
-	emit_jump_insn (gen_return_internal ());
+	{
+	  /* We need to further check attributes to determine whether
+	     there should be return instruction at epilogue.
+	     If the attribute naked exists but -mno-ret-in-naked-func
+	     is issued, there is NO need to generate return instruction.  */
+	  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
+	    return;
+
+	  emit_jump_insn (gen_return_internal ());
+	}
       return;
     }
 
@@ -4435,9 +5017,13 @@ nds32_expand_prologue_v3push (void)
   if (cfun->machine->callee_saved_gpr_regs_size > 0)
     df_set_regs_ever_live (FP_REGNUM, 1);
 
+  /* Check frame_pointer_needed again to prevent fp is need after reload.  */
+  if (frame_pointer_needed)
+    cfun->machine->fp_as_gp_p = false;
+
   /* If the function is 'naked',
      we do not have to generate prologue code fragment.  */
-  if (cfun->machine->naked_p)
+  if (cfun->machine->naked_p && !flag_pic)
     return;
 
   /* Get callee_first_regno and callee_last_regno.  */
@@ -4565,6 +5151,10 @@ nds32_expand_prologue_v3push (void)
 			       -1 * sp_adjust);
     }
 
+  /* Emit gp setup instructions for -fpic.  */
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    nds32_emit_load_gp ();
+
   /* Prevent the instruction scheduler from
      moving instructions across the boundary.  */
   emit_insn (gen_blockage ());
@@ -4590,9 +5180,19 @@ nds32_expand_epilogue_v3pop (bool sibcall_p)
   if (cfun->machine->naked_p)
     {
       /* Generate return instruction by using 'return_internal' pattern.
-	 Make sure this instruction is after gen_blockage().  */
+	 Make sure this instruction is after gen_blockage().
+	 First we need to check this is a function without sibling call.  */
       if (!sibcall_p)
-	emit_jump_insn (gen_return_internal ());
+	{
+	  /* We need to further check attributes to determine whether
+	     there should be return instruction at epilogue.
+	     If the attribute naked exists but -mno-ret-in-naked-func
+	     is issued, there is NO need to generate return instruction.  */
+	  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
+	    return;
+
+	  emit_jump_insn (gen_return_internal ());
+	}
       return;
     }
 
@@ -4756,6 +5356,11 @@ nds32_can_use_return_insn (void)
   if (!reload_completed)
     return 0;
 
+  /* If attribute 'naked' appears but -mno-ret-in-naked-func is used,
+     we cannot use return instruction.  */
+  if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func)
+    return 0;
+
   sp_adjust = cfun->machine->local_size
 	      + cfun->machine->out_args_size
 	      + cfun->machine->callee_saved_area_gpr_padding_bytes
@@ -5009,6 +5614,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 #undef TARGET_FUNCTION_ARG_BOUNDARY
 #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary
 
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p
+
 /* -- How Scalar Function Values Are Returned.  */
 
 #undef TARGET_FUNCTION_VALUE
@@ -5086,6 +5694,21 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p
 
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS nds32_legitimize_address
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P nds32_legitimate_constant_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM nds32_cannot_force_const_mem
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS nds32_delegitimize_address
+
 
 /* Anchored Addresses.  */
 
@@ -5146,6 +5769,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nds32_asm_output_addr_const_extra
+
 /* -- Output of Uninitialized Variables.  */
 
 /* -- Output and Generation of Labels.  */
@@ -5215,6 +5841,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 
 /* Emulating TLS.  */
 
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS TARGET_LINUX_ABI
+
 
 /* Defining coprocessor specifics for MIPS targets.  */
 
@@ -5242,6 +5871,8 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN nds32_expand_builtin
 
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS nds32_init_libfuncs
 
 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P nds32_use_blocks_for_constant_p
diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h
index 29edccdd040..e3ceb632ebd 100644
--- a/gcc/config/nds32/nds32.h
+++ b/gcc/config/nds32/nds32.h
@@ -36,6 +36,16 @@
 #define NDS32_SYMBOL_REF_RODATA_P(x) \
   ((SYMBOL_REF_FLAGS (x) & NDS32_SYMBOL_FLAG_RODATA) != 0)
 
+enum nds32_relax_insn_type
+{
+  RELAX_ORI,
+  RELAX_PLT_ADD,
+  RELAX_TLS_ADD_or_LW,
+  RELAX_TLS_ADD_LW,
+  RELAX_TLS_LW_JRAL,
+  RELAX_DONE
+};
+
 /* Classifies expand result for expand helper function.  */
 enum nds32_expand_result_type
 {
@@ -140,6 +150,9 @@ enum nds32_16bit_address_type
    Check gcc/common/config/nds32/nds32-common.c for the optimizations that
    apply -malways-align.  */
 #define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN)
+
+#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP)
+
 /* Get alignment according to mode or type information.
    When 'type' is nonnull, there is no need to look at 'mode'.  */
 #define NDS32_MODE_TYPE_ALIGN(mode, type) \
@@ -305,6 +318,10 @@ struct GTY(()) machine_function
        2. The rtl lowering and optimization are close to target code.
 	  For this case we need address to be strictly aligned.  */
   int strict_aligned_p;
+
+  /* Record two similar attributes status.  */
+  int attr_naked_p;
+  int attr_no_prologue_p;
 };
 
 /* A C structure that contains the arguments information.  */
@@ -350,7 +367,8 @@ enum nds32_isr_nested_type
 {
   NDS32_NESTED,
   NDS32_NOT_NESTED,
-  NDS32_NESTED_READY
+  NDS32_NESTED_READY,
+  NDS32_CRITICAL
 };
 
 /* Define structure to record isr information.
@@ -378,6 +396,13 @@ struct nds32_isr_info
      unless user specifies attribute to change it.  */
   enum nds32_isr_nested_type nested_type;
 
+  /* Secure isr level.
+     Currently we have 0-3 security level.
+     It should be set to 0 by default.
+     For security processors, this is determined by secure
+     attribute or compiler options.  */
+  unsigned int security_level;
+
   /* Total vectors.
      The total vectors = interrupt + exception numbers + reset.
      It should be set to 0 by default.
@@ -439,7 +464,30 @@ enum nds32_builtins
   NDS32_BUILTIN_FFB,
   NDS32_BUILTIN_FFMISM,
   NDS32_BUILTIN_FLMISM,
-
+  NDS32_BUILTIN_KADDW,
+  NDS32_BUILTIN_KSUBW,
+  NDS32_BUILTIN_KADDH,
+  NDS32_BUILTIN_KSUBH,
+  NDS32_BUILTIN_KDMBB,
+  NDS32_BUILTIN_V_KDMBB,
+  NDS32_BUILTIN_KDMBT,
+  NDS32_BUILTIN_V_KDMBT,
+  NDS32_BUILTIN_KDMTB,
+  NDS32_BUILTIN_V_KDMTB,
+  NDS32_BUILTIN_KDMTT,
+  NDS32_BUILTIN_V_KDMTT,
+  NDS32_BUILTIN_KHMBB,
+  NDS32_BUILTIN_V_KHMBB,
+  NDS32_BUILTIN_KHMBT,
+  NDS32_BUILTIN_V_KHMBT,
+  NDS32_BUILTIN_KHMTB,
+  NDS32_BUILTIN_V_KHMTB,
+  NDS32_BUILTIN_KHMTT,
+  NDS32_BUILTIN_V_KHMTT,
+  NDS32_BUILTIN_KSLRAW,
+  NDS32_BUILTIN_KSLRAW_U,
+  NDS32_BUILTIN_RDOV,
+  NDS32_BUILTIN_CLROV,
   NDS32_BUILTIN_ROTR,
   NDS32_BUILTIN_SVA,
   NDS32_BUILTIN_SVS,
@@ -512,7 +560,295 @@ enum nds32_builtins
   NDS32_BUILTIN_SET_TRIG_LEVEL,
   NDS32_BUILTIN_SET_TRIG_EDGE,
   NDS32_BUILTIN_GET_TRIG_TYPE,
-
+  NDS32_BUILTIN_DSP_BEGIN,
+  NDS32_BUILTIN_ADD16,
+  NDS32_BUILTIN_V_UADD16,
+  NDS32_BUILTIN_V_SADD16,
+  NDS32_BUILTIN_RADD16,
+  NDS32_BUILTIN_V_RADD16,
+  NDS32_BUILTIN_URADD16,
+  NDS32_BUILTIN_V_URADD16,
+  NDS32_BUILTIN_KADD16,
+  NDS32_BUILTIN_V_KADD16,
+  NDS32_BUILTIN_UKADD16,
+  NDS32_BUILTIN_V_UKADD16,
+  NDS32_BUILTIN_SUB16,
+  NDS32_BUILTIN_V_USUB16,
+  NDS32_BUILTIN_V_SSUB16,
+  NDS32_BUILTIN_RSUB16,
+  NDS32_BUILTIN_V_RSUB16,
+  NDS32_BUILTIN_URSUB16,
+  NDS32_BUILTIN_V_URSUB16,
+  NDS32_BUILTIN_KSUB16,
+  NDS32_BUILTIN_V_KSUB16,
+  NDS32_BUILTIN_UKSUB16,
+  NDS32_BUILTIN_V_UKSUB16,
+  NDS32_BUILTIN_CRAS16,
+  NDS32_BUILTIN_V_UCRAS16,
+  NDS32_BUILTIN_V_SCRAS16,
+  NDS32_BUILTIN_RCRAS16,
+  NDS32_BUILTIN_V_RCRAS16,
+  NDS32_BUILTIN_URCRAS16,
+  NDS32_BUILTIN_V_URCRAS16,
+  NDS32_BUILTIN_KCRAS16,
+  NDS32_BUILTIN_V_KCRAS16,
+  NDS32_BUILTIN_UKCRAS16,
+  NDS32_BUILTIN_V_UKCRAS16,
+  NDS32_BUILTIN_CRSA16,
+  NDS32_BUILTIN_V_UCRSA16,
+  NDS32_BUILTIN_V_SCRSA16,
+  NDS32_BUILTIN_RCRSA16,
+  NDS32_BUILTIN_V_RCRSA16,
+  NDS32_BUILTIN_URCRSA16,
+  NDS32_BUILTIN_V_URCRSA16,
+  NDS32_BUILTIN_KCRSA16,
+  NDS32_BUILTIN_V_KCRSA16,
+  NDS32_BUILTIN_UKCRSA16,
+  NDS32_BUILTIN_V_UKCRSA16,
+  NDS32_BUILTIN_ADD8,
+  NDS32_BUILTIN_V_UADD8,
+  NDS32_BUILTIN_V_SADD8,
+  NDS32_BUILTIN_RADD8,
+  NDS32_BUILTIN_V_RADD8,
+  NDS32_BUILTIN_URADD8,
+  NDS32_BUILTIN_V_URADD8,
+  NDS32_BUILTIN_KADD8,
+  NDS32_BUILTIN_V_KADD8,
+  NDS32_BUILTIN_UKADD8,
+  NDS32_BUILTIN_V_UKADD8,
+  NDS32_BUILTIN_SUB8,
+  NDS32_BUILTIN_V_USUB8,
+  NDS32_BUILTIN_V_SSUB8,
+  NDS32_BUILTIN_RSUB8,
+  NDS32_BUILTIN_V_RSUB8,
+  NDS32_BUILTIN_URSUB8,
+  NDS32_BUILTIN_V_URSUB8,
+  NDS32_BUILTIN_KSUB8,
+  NDS32_BUILTIN_V_KSUB8,
+  NDS32_BUILTIN_UKSUB8,
+  NDS32_BUILTIN_V_UKSUB8,
+  NDS32_BUILTIN_SRA16,
+  NDS32_BUILTIN_V_SRA16,
+  NDS32_BUILTIN_SRA16_U,
+  NDS32_BUILTIN_V_SRA16_U,
+  NDS32_BUILTIN_SRL16,
+  NDS32_BUILTIN_V_SRL16,
+  NDS32_BUILTIN_SRL16_U,
+  NDS32_BUILTIN_V_SRL16_U,
+  NDS32_BUILTIN_SLL16,
+  NDS32_BUILTIN_V_SLL16,
+  NDS32_BUILTIN_KSLL16,
+  NDS32_BUILTIN_V_KSLL16,
+  NDS32_BUILTIN_KSLRA16,
+  NDS32_BUILTIN_V_KSLRA16,
+  NDS32_BUILTIN_KSLRA16_U,
+  NDS32_BUILTIN_V_KSLRA16_U,
+  NDS32_BUILTIN_CMPEQ16,
+  NDS32_BUILTIN_V_SCMPEQ16,
+  NDS32_BUILTIN_V_UCMPEQ16,
+  NDS32_BUILTIN_SCMPLT16,
+  NDS32_BUILTIN_V_SCMPLT16,
+  NDS32_BUILTIN_SCMPLE16,
+  NDS32_BUILTIN_V_SCMPLE16,
+  NDS32_BUILTIN_UCMPLT16,
+  NDS32_BUILTIN_V_UCMPLT16,
+  NDS32_BUILTIN_UCMPLE16,
+  NDS32_BUILTIN_V_UCMPLE16,
+  NDS32_BUILTIN_CMPEQ8,
+  NDS32_BUILTIN_V_SCMPEQ8,
+  NDS32_BUILTIN_V_UCMPEQ8,
+  NDS32_BUILTIN_SCMPLT8,
+  NDS32_BUILTIN_V_SCMPLT8,
+  NDS32_BUILTIN_SCMPLE8,
+  NDS32_BUILTIN_V_SCMPLE8,
+  NDS32_BUILTIN_UCMPLT8,
+  NDS32_BUILTIN_V_UCMPLT8,
+  NDS32_BUILTIN_UCMPLE8,
+  NDS32_BUILTIN_V_UCMPLE8,
+  NDS32_BUILTIN_SMIN16,
+  NDS32_BUILTIN_V_SMIN16,
+  NDS32_BUILTIN_UMIN16,
+  NDS32_BUILTIN_V_UMIN16,
+  NDS32_BUILTIN_SMAX16,
+  NDS32_BUILTIN_V_SMAX16,
+  NDS32_BUILTIN_UMAX16,
+  NDS32_BUILTIN_V_UMAX16,
+  NDS32_BUILTIN_SCLIP16,
+  NDS32_BUILTIN_V_SCLIP16,
+  NDS32_BUILTIN_UCLIP16,
+  NDS32_BUILTIN_V_UCLIP16,
+  NDS32_BUILTIN_KHM16,
+  NDS32_BUILTIN_V_KHM16,
+  NDS32_BUILTIN_KHMX16,
+  NDS32_BUILTIN_V_KHMX16,
+  NDS32_BUILTIN_KABS16,
+  NDS32_BUILTIN_V_KABS16,
+  NDS32_BUILTIN_SMIN8,
+  NDS32_BUILTIN_V_SMIN8,
+  NDS32_BUILTIN_UMIN8,
+  NDS32_BUILTIN_V_UMIN8,
+  NDS32_BUILTIN_SMAX8,
+  NDS32_BUILTIN_V_SMAX8,
+  NDS32_BUILTIN_UMAX8,
+  NDS32_BUILTIN_V_UMAX8,
+  NDS32_BUILTIN_KABS8,
+  NDS32_BUILTIN_V_KABS8,
+  NDS32_BUILTIN_SUNPKD810,
+  NDS32_BUILTIN_V_SUNPKD810,
+  NDS32_BUILTIN_SUNPKD820,
+  NDS32_BUILTIN_V_SUNPKD820,
+  NDS32_BUILTIN_SUNPKD830,
+  NDS32_BUILTIN_V_SUNPKD830,
+  NDS32_BUILTIN_SUNPKD831,
+  NDS32_BUILTIN_V_SUNPKD831,
+  NDS32_BUILTIN_ZUNPKD810,
+  NDS32_BUILTIN_V_ZUNPKD810,
+  NDS32_BUILTIN_ZUNPKD820,
+  NDS32_BUILTIN_V_ZUNPKD820,
+  NDS32_BUILTIN_ZUNPKD830,
+  NDS32_BUILTIN_V_ZUNPKD830,
+  NDS32_BUILTIN_ZUNPKD831,
+  NDS32_BUILTIN_V_ZUNPKD831,
+  NDS32_BUILTIN_RADDW,
+  NDS32_BUILTIN_URADDW,
+  NDS32_BUILTIN_RSUBW,
+  NDS32_BUILTIN_URSUBW,
+  NDS32_BUILTIN_SRA_U,
+  NDS32_BUILTIN_KSLL,
+  NDS32_BUILTIN_PKBB16,
+  NDS32_BUILTIN_V_PKBB16,
+  NDS32_BUILTIN_PKBT16,
+  NDS32_BUILTIN_V_PKBT16,
+  NDS32_BUILTIN_PKTB16,
+  NDS32_BUILTIN_V_PKTB16,
+  NDS32_BUILTIN_PKTT16,
+  NDS32_BUILTIN_V_PKTT16,
+  NDS32_BUILTIN_SMMUL,
+  NDS32_BUILTIN_SMMUL_U,
+  NDS32_BUILTIN_KMMAC,
+  NDS32_BUILTIN_KMMAC_U,
+  NDS32_BUILTIN_KMMSB,
+  NDS32_BUILTIN_KMMSB_U,
+  NDS32_BUILTIN_KWMMUL,
+  NDS32_BUILTIN_KWMMUL_U,
+  NDS32_BUILTIN_SMMWB,
+  NDS32_BUILTIN_V_SMMWB,
+  NDS32_BUILTIN_SMMWB_U,
+  NDS32_BUILTIN_V_SMMWB_U,
+  NDS32_BUILTIN_SMMWT,
+  NDS32_BUILTIN_V_SMMWT,
+  NDS32_BUILTIN_SMMWT_U,
+  NDS32_BUILTIN_V_SMMWT_U,
+  NDS32_BUILTIN_KMMAWB,
+  NDS32_BUILTIN_V_KMMAWB,
+  NDS32_BUILTIN_KMMAWB_U,
+  NDS32_BUILTIN_V_KMMAWB_U,
+  NDS32_BUILTIN_KMMAWT,
+  NDS32_BUILTIN_V_KMMAWT,
+  NDS32_BUILTIN_KMMAWT_U,
+  NDS32_BUILTIN_V_KMMAWT_U,
+  NDS32_BUILTIN_SMBB,
+  NDS32_BUILTIN_V_SMBB,
+  NDS32_BUILTIN_SMBT,
+  NDS32_BUILTIN_V_SMBT,
+  NDS32_BUILTIN_SMTT,
+  NDS32_BUILTIN_V_SMTT,
+  NDS32_BUILTIN_KMDA,
+  NDS32_BUILTIN_V_KMDA,
+  NDS32_BUILTIN_KMXDA,
+  NDS32_BUILTIN_V_KMXDA,
+  NDS32_BUILTIN_SMDS,
+  NDS32_BUILTIN_V_SMDS,
+  NDS32_BUILTIN_SMDRS,
+  NDS32_BUILTIN_V_SMDRS,
+  NDS32_BUILTIN_SMXDS,
+  NDS32_BUILTIN_V_SMXDS,
+  NDS32_BUILTIN_KMABB,
+  NDS32_BUILTIN_V_KMABB,
+  NDS32_BUILTIN_KMABT,
+  NDS32_BUILTIN_V_KMABT,
+  NDS32_BUILTIN_KMATT,
+  NDS32_BUILTIN_V_KMATT,
+  NDS32_BUILTIN_KMADA,
+  NDS32_BUILTIN_V_KMADA,
+  NDS32_BUILTIN_KMAXDA,
+  NDS32_BUILTIN_V_KMAXDA,
+  NDS32_BUILTIN_KMADS,
+  NDS32_BUILTIN_V_KMADS,
+  NDS32_BUILTIN_KMADRS,
+  NDS32_BUILTIN_V_KMADRS,
+  NDS32_BUILTIN_KMAXDS,
+  NDS32_BUILTIN_V_KMAXDS,
+  NDS32_BUILTIN_KMSDA,
+  NDS32_BUILTIN_V_KMSDA,
+  NDS32_BUILTIN_KMSXDA,
+  NDS32_BUILTIN_V_KMSXDA,
+  NDS32_BUILTIN_SMAL,
+  NDS32_BUILTIN_V_SMAL,
+  NDS32_BUILTIN_BITREV,
+  NDS32_BUILTIN_WEXT,
+  NDS32_BUILTIN_BPICK,
+  NDS32_BUILTIN_INSB,
+  NDS32_BUILTIN_SADD64,
+  NDS32_BUILTIN_UADD64,
+  NDS32_BUILTIN_RADD64,
+  NDS32_BUILTIN_URADD64,
+  NDS32_BUILTIN_KADD64,
+  NDS32_BUILTIN_UKADD64,
+  NDS32_BUILTIN_SSUB64,
+  NDS32_BUILTIN_USUB64,
+  NDS32_BUILTIN_RSUB64,
+  NDS32_BUILTIN_URSUB64,
+  NDS32_BUILTIN_KSUB64,
+  NDS32_BUILTIN_UKSUB64,
+  NDS32_BUILTIN_SMAR64,
+  NDS32_BUILTIN_SMSR64,
+  NDS32_BUILTIN_UMAR64,
+  NDS32_BUILTIN_UMSR64,
+  NDS32_BUILTIN_KMAR64,
+  NDS32_BUILTIN_KMSR64,
+  NDS32_BUILTIN_UKMAR64,
+  NDS32_BUILTIN_UKMSR64,
+  NDS32_BUILTIN_SMALBB,
+  NDS32_BUILTIN_V_SMALBB,
+  NDS32_BUILTIN_SMALBT,
+  NDS32_BUILTIN_V_SMALBT,
+  NDS32_BUILTIN_SMALTT,
+  NDS32_BUILTIN_V_SMALTT,
+  NDS32_BUILTIN_SMALDA,
+  NDS32_BUILTIN_V_SMALDA,
+  NDS32_BUILTIN_SMALXDA,
+  NDS32_BUILTIN_V_SMALXDA,
+  NDS32_BUILTIN_SMALDS,
+  NDS32_BUILTIN_V_SMALDS,
+  NDS32_BUILTIN_SMALDRS,
+  NDS32_BUILTIN_V_SMALDRS,
+  NDS32_BUILTIN_SMALXDS,
+  NDS32_BUILTIN_V_SMALXDS,
+  NDS32_BUILTIN_SMUL16,
+  NDS32_BUILTIN_V_SMUL16,
+  NDS32_BUILTIN_SMULX16,
+  NDS32_BUILTIN_V_SMULX16,
+  NDS32_BUILTIN_UMUL16,
+  NDS32_BUILTIN_V_UMUL16,
+  NDS32_BUILTIN_UMULX16,
+  NDS32_BUILTIN_V_UMULX16,
+  NDS32_BUILTIN_SMSLDA,
+  NDS32_BUILTIN_V_SMSLDA,
+  NDS32_BUILTIN_SMSLXDA,
+  NDS32_BUILTIN_V_SMSLXDA,
+  NDS32_BUILTIN_UCLIP32,
+  NDS32_BUILTIN_SCLIP32,
+  NDS32_BUILTIN_KABS,
+  NDS32_BUILTIN_UALOAD_U16,
+  NDS32_BUILTIN_UALOAD_S16,
+  NDS32_BUILTIN_UALOAD_U8,
+  NDS32_BUILTIN_UALOAD_S8,
+  NDS32_BUILTIN_UASTORE_U16,
+  NDS32_BUILTIN_UASTORE_S16,
+  NDS32_BUILTIN_UASTORE_U8,
+  NDS32_BUILTIN_UASTORE_S8,
+  NDS32_BUILTIN_DSP_END,
   NDS32_BUILTIN_UNALIGNED_FEATURE,
   NDS32_BUILTIN_ENABLE_UNALIGNED,
   NDS32_BUILTIN_DISABLE_UNALIGNED,
@@ -521,16 +857,30 @@ enum nds32_builtins
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_ISA_V2   (nds32_arch_option == ARCH_V2)
+#define TARGET_ISR_VECTOR_SIZE_4_BYTE \
+  (nds32_isr_vector_size == 4)
 
+#define TARGET_ISA_V2   (nds32_arch_option == ARCH_V2)
 #define TARGET_ISA_V3 \
   (nds32_arch_option == ARCH_V3 \
+   || nds32_arch_option == ARCH_V3J \
    || nds32_arch_option == ARCH_V3F \
    || nds32_arch_option == ARCH_V3S)
 #define TARGET_ISA_V3M  (nds32_arch_option == ARCH_V3M)
 
+#define TARGET_PIPELINE_N7 \
+  (nds32_cpu_option == CPU_N7)
+#define TARGET_PIPELINE_N8 \
+  (nds32_cpu_option == CPU_N6 \
+   || nds32_cpu_option == CPU_N8)
 #define TARGET_PIPELINE_N9 \
   (nds32_cpu_option == CPU_N9)
+#define TARGET_PIPELINE_N10 \
+  (nds32_cpu_option == CPU_N10)
+#define TARGET_PIPELINE_N13 \
+  (nds32_cpu_option == CPU_N12 || nds32_cpu_option == CPU_N13)
+#define TARGET_PIPELINE_GRAYWOLF \
+  (nds32_cpu_option == CPU_GRAYWOLF)
 #define TARGET_PIPELINE_SIMPLE \
   (nds32_cpu_option == CPU_SIMPLE)
 
@@ -541,6 +891,12 @@ enum nds32_builtins
 #define TARGET_CMODEL_LARGE \
    (nds32_cmodel_option == CMODEL_LARGE)
 
+#define TARGET_ICT_MODEL_SMALL \
+   (nds32_ict_model == ICT_MODEL_SMALL)
+
+#define TARGET_ICT_MODEL_LARGE \
+   (nds32_ict_model == ICT_MODEL_LARGE)
+
 /* When -mcmodel=small or -mcmodel=medium,
    compiler may generate gp-base instruction directly.  */
 #define TARGET_GP_DIRECT \
@@ -576,6 +932,21 @@ enum nds32_builtins
 #endif
 
 #define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2
+
+/* ------------------------------------------------------------------------ */
+
+#ifdef TARGET_DEFAULT_RELAX
+#  define NDS32_RELAX_SPEC " %{!mno-relax:--relax}"
+#else
+#  define NDS32_RELAX_SPEC " %{mrelax:--relax}"
+#endif
+
+#ifdef TARGET_DEFAULT_EXT_DSP
+#  define NDS32_EXT_DSP_SPEC " %{!mno-ext-dsp:-mext-dsp}"
+#else
+#  define NDS32_EXT_DSP_SPEC ""
+#endif
+
 /* ------------------------------------------------------------------------ */
 
 /* Controlling the Compilation Driver.  */
@@ -591,11 +962,15 @@ enum nds32_builtins
   {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }
 
 #define CC1_SPEC \
-  ""
+  NDS32_EXT_DSP_SPEC
 
 #define ASM_SPEC \
   " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
   " %{march=*:-march=%*}" \
+  " %{mno-16-bit|mno-16bit:-mno-16bit-ext}" \
+  " %{march=v3m:%{!mfull-regs:%{!mreduced-regs:-mreduced-regs}}}" \
+  " %{mfull-regs:-mno-reduced-regs}" \
+  " %{mreduced-regs:-mreduced-regs}" \
   " %{mabi=*:-mabi=v%*}" \
   " %{mconfig-fpu=*:-mfpu-freg=%*}" \
   " %{mext-fpu-mac:-mmac}" \
@@ -603,35 +978,9 @@ enum nds32_builtins
   " %{mext-fpu-sp:-mfpu-sp-ext}" \
   " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \
   " %{mext-fpu-dp:-mfpu-dp-ext}" \
-  " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}"
-
-/* If user issues -mrelax, we need to pass '--relax' to linker.  */
-#define LINK_SPEC \
-  " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
-  " %{mrelax:--relax}"
-
-#define LIB_SPEC \
-  " -lc -lgloss"
-
-/* The option -mno-ctor-dtor can disable constructor/destructor feature
-   by applying different crt stuff.  In the convention, crt0.o is the
-   startup file without constructor/destructor;
-   crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the
-   startup files with constructor/destructor.
-   Note that crt0.o, crt1.o, crti.o, and crtn.o are provided
-   by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are
-   currently provided by GCC for nds32 target.
-
-   For nds32 target so far:
-   If -mno-ctor-dtor, we are going to link
-   "crt0.o [user objects]".
-   If general cases, we are going to link
-   "crt1.o crtbegin1.o [user objects] crtend1.o".  */
-#define STARTFILE_SPEC \
-  " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \
-  " %{!mno-ctor-dtor:crtbegin1.o%s}"
-#define ENDFILE_SPEC \
-  " %{!mno-ctor-dtor:crtend1.o%s}"
+  " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \
+  " %{mext-dsp:-mdsp-ext}" \
+  " %{O|O1|O2|O3|Ofast:-O1;:-Os}"
 
 /* The TARGET_BIG_ENDIAN_DEFAULT is defined if we
    configure gcc with --target=nds32be-* setting.
@@ -642,9 +991,11 @@ enum nds32_builtins
 #  define NDS32_ENDIAN_DEFAULT "mlittle-endian"
 #endif
 
-/* Currently we only have elf toolchain,
-   where -mcmodel=medium is always the default.  */
-#define NDS32_CMODEL_DEFAULT "mcmodel=medium"
+#if TARGET_ELF
+#  define NDS32_CMODEL_DEFAULT "mcmodel=medium"
+#else
+#  define NDS32_CMODEL_DEFAULT "mcmodel=large"
+#endif
 
 #define MULTILIB_DEFAULTS \
   { NDS32_ENDIAN_DEFAULT, NDS32_CMODEL_DEFAULT }
@@ -1139,12 +1490,17 @@ enum reg_class
 
 #define PIC_OFFSET_TABLE_REGNUM GP_REGNUM
 
+#define SYMBOLIC_CONST_P(X)	\
+(GET_CODE (X) == SYMBOL_REF						\
+ || GET_CODE (X) == LABEL_REF						\
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
 
 /* Defining the Output Assembler Language.  */
 
 #define ASM_COMMENT_START "!"
 
-#define ASM_APP_ON "! #APP"
+#define ASM_APP_ON "! #APP\n"
 
 #define ASM_APP_OFF "! #NO_APP\n"
 
diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md
index 3b8107e8fbf..f5349d7cc76 100644
--- a/gcc/config/nds32/nds32.md
+++ b/gcc/config/nds32/nds32.md
@@ -56,24 +56,29 @@
 ;; ------------------------------------------------------------------------
 
 ;; CPU pipeline model.
-(define_attr "pipeline_model" "n7,n8,e8,n9,simple"
+(define_attr "pipeline_model" "n7,n8,e8,n9,n10,graywolf,n13,simple"
   (const
     (cond [(match_test "nds32_cpu_option == CPU_N7")  (const_string "n7")
 	   (match_test "nds32_cpu_option == CPU_E8")  (const_string "e8")
 	   (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8")  (const_string "n8")
 	   (match_test "nds32_cpu_option == CPU_N9")  (const_string "n9")
+	   (match_test "nds32_cpu_option == CPU_N10") (const_string "n10")
+	   (match_test "nds32_cpu_option == CPU_GRAYWOLF") (const_string "graywolf")
+	   (match_test "nds32_cpu_option == CPU_N12") (const_string "n13")
+	   (match_test "nds32_cpu_option == CPU_N13") (const_string "n13")
 	   (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")]
 	  (const_string "n9"))))
 
 ;; Insn type, it is used to default other attribute values.
 (define_attr "type"
   "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\
-   falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore"
+   falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\
+   dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext"
   (const_string "unknown"))
 
 ;; Insn sub-type
 (define_attr "subtype"
-  "simple,shift"
+  "simple,shift,saturation"
   (const_string "simple"))
 
 ;; Length, in bytes, default is 4-bytes.
@@ -133,6 +138,7 @@
 
 ;; ----------------------------------------------------------------------------
 
+(include "nds32-dspext.md")
 
 ;; Move instructions.
 
@@ -209,6 +215,27 @@
 						  low12_int));
       DONE;
     }
+
+  if ((REG_P (operands[0]) || GET_CODE (operands[0]) == SUBREG)
+       && SYMBOLIC_CONST_P (operands[1]))
+    {
+      if (TARGET_ICT_MODEL_LARGE
+	  && nds32_indirect_call_referenced_p (operands[1]))
+	{
+	  nds32_expand_ict_move (operands);
+	  DONE;
+	}
+      else if (nds32_tls_referenced_p (operands [1]))
+	{
+	  nds32_expand_tls_move (operands);
+	  DONE;
+	}
+      else if (flag_pic)
+	{
+	  nds32_expand_pic_move (operands);
+	  DONE;
+	}
+    }
 })
 
 (define_insn "*mov<mode>"
@@ -271,8 +298,8 @@
 ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
 ;; are able to match such instruction template.
 (define_insn "move_addr"
-  [(set (match_operand:SI 0 "register_operand"       "=l, r")
-	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  [(set (match_operand:SI 0 "nds32_general_register_operand"   "=l, r")
+	(match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))]
   ""
   "la\t%0, %1"
   [(set_attr "type"  "alu")
@@ -351,13 +378,58 @@
 
 
 ;; ----------------------------------------------------------------------------
+(define_expand "extv"
+  [(set (match_operand 0 "register_operand" "")
+        (sign_extract (match_operand 1 "nonimmediate_operand" "")
+                      (match_operand 2 "const_int_operand" "")
+                      (match_operand 3 "const_int_operand" "")))]
+  ""
+{
+  enum nds32_expand_result_type result = nds32_expand_extv (operands);
+  switch (result)
+    {
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+                      (match_operand 1 "const_int_operand" "")
+                      (match_operand 2 "const_int_operand" ""))
+        (match_operand 3 "register_operand" ""))]
+  ""
+{
+  enum nds32_expand_result_type result = nds32_expand_insv (operands);
+  switch (result)
+    {
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+})
 
 ;; Arithmetic instructions.
 
 (define_insn "addsi3"
   [(set (match_operand:SI 0 "register_operand"               "=   d,   l,   d,   l, d, l,   k,   l,    r, r")
 	(plus:SI (match_operand:SI 1 "register_operand"      "%   0,   l,   0,   l, 0, l,   0,   k,    r, r")
-		 (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,Iu06, Is15, r")))]
+		 (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,IU06, Is15, r")))]
   ""
 {
   switch (which_alternative)
@@ -1428,11 +1500,30 @@
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
   ""
-  ""
+  {
+    rtx insn;
+    rtx sym = XEXP (operands[0], 0);
+
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[0] = gen_const_mem (Pmode, reg);
+      }
+
+    if (flag_pic)
+      {
+	insn = emit_call_insn (gen_call_internal
+			       (XEXP (operands[0], 0), GEN_INT (0)));
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+	DONE;
+      }
+  }
 )
 
 (define_insn "call_internal"
-  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i"))
+  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S"))
 		    (match_operand 1))
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
@@ -1474,9 +1565,11 @@
 		     (const_int 2)
 		     (const_int 4))
        ;; Alternative 1
-       (if_then_else (match_test "nds32_long_call_p (operands[0])")
-		     (const_int 12)
-		     (const_int 4))
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[0])")
+				   (const_int 12)
+				   (const_int 4)))
      ])]
 )
 
@@ -1492,11 +1585,33 @@
 		         (match_operand 2)))
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
-  "")
+  ""
+  {
+    rtx insn;
+    rtx sym = XEXP (operands[1], 0);
+
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[1] = gen_const_mem (Pmode, reg);
+      }
+
+    if (flag_pic)
+      {
+	insn =
+	  emit_call_insn (gen_call_value_internal
+			  (operands[0], XEXP (operands[1], 0), GEN_INT (0)));
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+	DONE;
+      }
+  }
+)
 
 (define_insn "call_value_internal"
   [(parallel [(set (match_operand 0)
-		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i"))
+		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S"))
 		         (match_operand 2)))
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
@@ -1538,9 +1653,11 @@
 		     (const_int 2)
 		     (const_int 4))
        ;; Alternative 1
-       (if_then_else (match_test "nds32_long_call_p (operands[1])")
-		     (const_int 12)
-		     (const_int 4))
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[1])")
+				   (const_int 12)
+				   (const_int 4)))
      ])]
 )
 
@@ -1583,10 +1700,21 @@
 		    (const_int 0))
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
-  "")
+  ""
+{
+    rtx sym = XEXP (operands[0], 0);
+
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[0] = gen_const_mem (Pmode, reg);
+      }
+})
 
 (define_insn "sibcall_internal"
-  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i"))
+  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S"))
 		    (match_operand 1))
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
@@ -1617,9 +1745,11 @@
 		     (const_int 2)
 		     (const_int 4))
        ;; Alternative 1
-       (if_then_else (match_test "nds32_long_call_p (operands[0])")
-		     (const_int 12)
-		     (const_int 4))
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[0])")
+				   (const_int 12)
+				   (const_int 4)))
      ])]
 )
 
@@ -1633,11 +1763,22 @@
 			 (const_int 0)))
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
-  "")
+  ""
+{
+    rtx sym = XEXP (operands[1], 0);
+
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[1] = gen_const_mem (Pmode, reg);
+      }
+})
 
 (define_insn "sibcall_value_internal"
   [(parallel [(set (match_operand 0)
-		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i"))
+		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S"))
 			 (match_operand 2)))
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
@@ -1668,9 +1809,11 @@
 		     (const_int 2)
 		     (const_int 4))
        ;; Alternative 1
-       (if_then_else (match_test "nds32_long_call_p (operands[1])")
-		     (const_int 12)
-		     (const_int 4))
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[1])")
+				   (const_int 12)
+				   (const_int 4)))
      ])]
 )
 
@@ -1687,12 +1830,33 @@
     nds32_expand_prologue_v3push ();
   else
     nds32_expand_prologue ();
+
+  /* If cfun->machine->fp_as_gp_p is true, we can generate special
+     directive to guide linker doing fp-as-gp optimization.
+     However, for a naked function, which means
+     it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+    emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM)));
+
   DONE;
 })
 
 (define_expand "epilogue" [(const_int 0)]
   ""
 {
+  /* If cfun->machine->fp_as_gp_p is true, we can generate special
+     directive to guide linker doing fp-as-gp optimization.
+     However, for a naked function, which means
+     it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+    emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM)));
+
   /* Note that only under V3/V3M ISA, we could use v3pop epilogue.
      In addition, we need to check if v3push is indeed available.  */
   if (NDS32_V3PUSH_AVAILABLE_P)
@@ -1792,7 +1956,8 @@
   "nds32_can_use_return_insn ()"
 {
   /* Emit as the simple return.  */
-  if (cfun->machine->naked_p
+  if (!cfun->machine->fp_as_gp_p
+      && cfun->machine->naked_p
       && (cfun->machine->va_args_size == 0))
     {
       emit_jump_insn (gen_return_internal ());
@@ -1802,9 +1967,14 @@
 
 ;; This pattern is expanded only by the shrink-wrapping optimization
 ;; on paths where the function prologue has not been executed.
+;; However, such optimization may reorder the prologue/epilogue blocks
+;; together with basic blocks within function body.
+;; So we must disable this pattern if we have already decided
+;; to perform fp_as_gp optimization, which requires prologue to be
+;; first block and epilogue to be last block.
 (define_expand "simple_return"
   [(simple_return)]
-  ""
+  "!cfun->machine->fp_as_gp_p"
   ""
 )
 
@@ -1823,6 +1993,9 @@
   [(simple_return)]
   ""
 {
+  if (nds32_isr_function_critical_p (current_function_decl))
+    return "iret";
+
   if (TARGET_16_BIT)
     return "ret5";
   else
@@ -1831,9 +2004,11 @@
   [(set_attr "type" "branch")
    (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (match_test "TARGET_16_BIT")
-		      (const_int 2)
-		      (const_int 4)))])
+	(if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)")
+		      (const_int 4)
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))))])
 
 
 ;; ----------------------------------------------------------------------------
@@ -1868,6 +2043,7 @@
 {
   rtx add_tmp;
   rtx reg, test;
+  rtx tmp_reg;
 
   /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
   if (operands[1] != const0_rtx)
@@ -1889,9 +2065,14 @@
   emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
 				  operands[4]));
 
-  /* Step C, D, E, and F, using another temporary register.  */
-  rtx tmp = gen_reg_rtx (SImode);
-  emit_jump_insn (gen_casesi_internal (operands[0], operands[3], tmp));
+  tmp_reg = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register tmp_reg.  */
+  if (flag_pic)
+    emit_use (pic_offset_table_rtx);
+
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       tmp_reg));
   DONE;
 })
 
@@ -1927,13 +2108,30 @@
   else
     return nds32_output_casesi (operands);
 }
-  [(set_attr "length" "20")
-   (set_attr "type" "branch")])
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+		      (const_int 28)
+		      (const_int 20)))])
 
 ;; ----------------------------------------------------------------------------
 
 ;; Performance Extension
 
+; If -fwrapv option is issued, GCC expects there will be
+; signed overflow situation.  So the ABS(INT_MIN) is still INT_MIN
+; (e.g. ABS(0x80000000)=0x80000000).
+; However, the hardware ABS instruction of nds32 target
+; always performs saturation: abs 0x80000000 -> 0x7fffffff.
+; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented.
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(abs:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv"
+  "abs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
 (define_insn "clzsi2"
   [(set (match_operand:SI 0 "register_operand"         "=r")
 	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
@@ -1996,6 +2194,25 @@
   [(set_attr "length" "0")]
 )
 
+;; Output .omit_fp_begin for fp-as-gp optimization.
+;; Also we have to set $fp register.
+(define_insn "omit_fp_begin"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))]
+  ""
+  "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----"
+  [(set_attr "length" "8")]
+)
+
+;; Output .omit_fp_end for fp-as-gp optimization.
+;; Claim that we have to use $fp register.
+(define_insn "omit_fp_end"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)]
+  ""
+  "! -----\;.omit_fp_end\;! -----"
+  [(set_attr "length" "0")]
+)
+
 (define_insn "pop25return"
   [(return)
    (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)]
@@ -2004,6 +2221,36 @@
   [(set_attr "length" "0")]
 )
 
+;; Add pc
+(define_insn "add_pc"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "0")
+		 (pc)))]
+  "TARGET_LINUX_ABI || flag_pic"
+  "add5.pc\t%0"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+{
+  emit_insn (gen_unspec_wsbh (operands[0], operands[1]));
+  emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16)));
+  DONE;
+})
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "wsbh\t%0, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
 ;; ----------------------------------------------------------------------------
 
 ;; Patterns for exception handling
@@ -2068,3 +2315,57 @@
 })
 
 ;; ----------------------------------------------------------------------------
+
+;; Patterns for TLS.
+;; The following two tls patterns don't be expanded directly because the
+;; intermediate value may be spilled into the stack.  As a result, it is
+;; hard to analyze the define-use chain in the relax_opt pass.
+
+
+;; There is a unspec operand to record RELAX_GROUP number because each
+;; emitted instruction need a relax_hint above it.
+(define_insn "tls_desc"
+  [(set (reg:SI 0)
+	(call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC)
+	      (const_int 1)))
+   (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
+   (use (reg:SI GP_REGNUM))
+   (clobber (reg:SI LP_REGNUM))
+   (clobber (reg:SI TA_REGNUM))]
+  ""
+  {
+    return nds32_output_tls_desc (operands);
+  }
+  [(set_attr "length" "20")
+   (set_attr "type" "branch")]
+)
+
+;; There is a unspec operand to record RELAX_GROUP number because each
+;; emitted instruction need a relax_hint above it.
+(define_insn "tls_ie"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE))
+   (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
+   (use (reg:SI GP_REGNUM))]
+  ""
+  {
+    return nds32_output_tls_ie (operands);
+  }
+  [(set (attr "length") (if_then_else (match_test "flag_pic")
+				      (const_int 12)
+				      (const_int 8)))
+   (set_attr "type" "misc")]
+)
+
+;; The pattern is for some relaxation groups that have to keep addsi3 in 32-bit mode.
+(define_insn "addsi3_32bit"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "%r")
+		    (match_operand:SI 2 "register_operand" " r")] UNSPEC_ADD32))]
+  ""
+  "add\t%0, %1, %2";
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+;; ----------------------------------------------------------------------------
diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt
index dcf6d396bc3..0e50c991aba 100644
--- a/gcc/config/nds32/nds32.opt
+++ b/gcc/config/nds32/nds32.opt
@@ -32,6 +32,13 @@ EL
 Target RejectNegative Alias(mlittle-endian)
 Generate code in little-endian mode.
 
+mfp-as-gp
+Target RejectNegative Alias(mforce-fp-as-gp)
+Force performing fp-as-gp optimization.
+
+mno-fp-as-gp
+Target RejectNegative Alias(mforbid-fp-as-gp)
+Forbid performing fp-as-gp optimization.
 
 ; ---------------------------------------------------------------
 
@@ -85,11 +92,36 @@ mlittle-endian
 Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
 Generate code in little-endian mode.
 
+mforce-fp-as-gp
+Target Undocumented Mask(FORCE_FP_AS_GP)
+Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization.
+
+mforbid-fp-as-gp
+Target Undocumented Mask(FORBID_FP_AS_GP)
+Forbid using $fp to access static and global variables.  This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'.
+
+mict-model=
+Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL)
+Specify the address generation strategy for ICT call's code model.
+
+Enum
+Name(nds32_ict_model_type) Type(enum nds32_ict_model_type)
+Known cmodel types (for use with the -mict-model= option):
+
+EnumValue
+Enum(nds32_ict_model_type) String(small) Value(ICT_MODEL_SMALL)
+
+EnumValue
+Enum(nds32_ict_model_type) String(large) Value(ICT_MODEL_LARGE)
 
 mcmov
 Target Report Mask(CMOV)
 Generate conditional move instructions.
 
+mhw-abs
+Target Report Mask(HW_ABS)
+Generate hardware abs instructions.
+
 mext-perf
 Target Report Mask(EXT_PERF)
 Generate performance extension instructions.
@@ -102,6 +134,10 @@ mext-string
 Target Report Mask(EXT_STRING)
 Generate string extension instructions.
 
+mext-dsp
+Target Report Mask(EXT_DSP)
+Generate DSP extension instructions.
+
 mv3push
 Target Report Mask(V3PUSH)
 Generate v3 push25/pop25 instructions.
@@ -115,13 +151,17 @@ Target Report Mask(RELAX_HINT)
 Insert relax hint for linker to do relaxation.
 
 mvh
-Target Report Mask(VH)
+Target Report Mask(VH) Condition(!TARGET_LINUX_ABI)
 Enable Virtual Hosting support.
 
 misr-vector-size=
 Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE)
 Specify the size of each interrupt vector, which must be 4 or 16.
 
+misr-secure=
+Target RejectNegative Joined UInteger Var(nds32_isr_secure_level) Init(0)
+Specify the security level of c-isr for the whole file.
+
 mcache-block-size=
 Target RejectNegative Joined UInteger Var(nds32_cache_block_size) Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE)
 Specify the size of each cache block, which must be a power of 2 between 4 and 512.
@@ -140,6 +180,9 @@ Enum(nds32_arch_type) String(v2) Value(ARCH_V2)
 EnumValue
 Enum(nds32_arch_type) String(v3) Value(ARCH_V3)
 
+EnumValue
+Enum(nds32_arch_type) String(v3j) Value(ARCH_V3J)
+
 EnumValue
 Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M)
 
@@ -149,23 +192,6 @@ Enum(nds32_arch_type) String(v3f) Value(ARCH_V3F)
 EnumValue
 Enum(nds32_arch_type) String(v3s) Value(ARCH_V3S)
 
-mcmodel=
-Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE)
-Specify the address generation strategy for code model.
-
-Enum
-Name(nds32_cmodel_type) Type(enum nds32_cmodel_type)
-Known cmodel types (for use with the -mcmodel= option):
-
-EnumValue
-Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL)
-
-EnumValue
-Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM)
-
-EnumValue
-Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE)
-
 mcpu=
 Target RejectNegative Joined Enum(nds32_cpu_type) Var(nds32_cpu_option) Init(CPU_N9)
 Specify the cpu for pipeline model.
@@ -234,6 +260,99 @@ Enum(nds32_cpu_type) String(n968) Value(CPU_N9)
 EnumValue
 Enum(nds32_cpu_type) String(n968a) Value(CPU_N9)
 
+EnumValue
+Enum(nds32_cpu_type) String(n10) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d10) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d1088) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10)
+
+EnumValue
+Enum(nds32_cpu_type) Undocumented String(graywolf) Value(CPU_GRAYWOLF)
+
+EnumValue
+Enum(nds32_cpu_type) String(n15) Value(CPU_GRAYWOLF)
+
+EnumValue
+Enum(nds32_cpu_type) String(d15) Value(CPU_GRAYWOLF)
+
+EnumValue
+Enum(nds32_cpu_type) String(n15s) Value(CPU_GRAYWOLF)
+
+EnumValue
+Enum(nds32_cpu_type) String(d15s) Value(CPU_GRAYWOLF)
+
+EnumValue
+Enum(nds32_cpu_type) String(n15f) Value(CPU_GRAYWOLF)
+
+EnumValue
+Enum(nds32_cpu_type) String(d15f) Value(CPU_GRAYWOLF)
+
+EnumValue
+Enum(nds32_cpu_type) String(n12) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1213) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1233) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1233-fpu) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1233-spu) Value(CPU_N12)
+
+EnumValue
+Enum(nds32_cpu_type) String(n13) Value(CPU_N13)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1337) Value(CPU_N13)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1337-fpu) Value(CPU_N13)
+
+EnumValue
+Enum(nds32_cpu_type) String(n1337-spu) Value(CPU_N13)
+
 EnumValue
 Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE)
 
@@ -321,6 +440,18 @@ mext-fpu-dp
 Target Report Mask(FPU_DOUBLE)
 Generate double-precision floating-point instructions.
 
+mforce-no-ext-dsp
+Target Undocumented Report Mask(FORCE_NO_EXT_DSP)
+Force disable hardware loop, even use -mext-dsp.
+
+msched-prolog-epilog
+Target Var(flag_sched_prolog_epilog) Init(0)
+Permit scheduling of a function's prologue and epilogue sequence.
+
+mret-in-naked-func
+Target Var(flag_ret_in_naked_func) Init(1)
+Generate return instruction in naked function.
+
 malways-save-lp
 Target Var(flag_always_save_lp) Init(0)
 Always save $lp in the stack.
@@ -328,3 +459,7 @@ Always save $lp in the stack.
 munaligned-access
 Target Report Var(flag_unaligned_access) Init(0)
 Enable unaligned word and halfword accesses to packed data.
+
+minline-asm-r15
+Target Report Var(flag_inline_asm_r15) Init(0)
+Allow use r15 for inline ASM.
diff --git a/gcc/config/nds32/nds32_init.inc b/gcc/config/nds32/nds32_init.inc
new file mode 100644
index 00000000000..1084ad0e471
--- /dev/null
+++ b/gcc/config/nds32/nds32_init.inc
@@ -0,0 +1,43 @@
+/*
+ * nds32_init.inc
+ *
+ * NDS32 architecture startup assembler header file
+ *
+ */
+
+.macro nds32_init
+
+	! Initialize GP for data access
+	la      $gp, _SDA_BASE_
+
+#if defined(__NDS32_EXT_EX9__)
+	! Check HW for EX9
+	mfsr    $r0, $MSC_CFG
+	li      $r1, (1 << 24)
+	and     $r2, $r0, $r1
+	beqz    $r2, 1f
+
+	! Initialize the table base of EX9 instruction
+	la      $r0, _ITB_BASE_
+	mtusr   $r0, $ITB
+1:
+#endif
+
+#if defined(__NDS32_EXT_FPU_DP__) || defined(__NDS32_EXT_FPU_SP__)
+	! Enable FPU
+	mfsr    $r0, $FUCOP_CTL
+	ori     $r0, $r0, #0x1
+	mtsr    $r0, $FUCOP_CTL
+	dsb
+
+	! Enable denormalized flush-to-Zero mode
+	fmfcsr  $r0
+	ori     $r0,$r0,#0x1000
+	fmtcsr  $r0
+	dsb
+#endif
+
+	! Initialize default stack pointer
+	la      $sp, _stack
+
+.endm
diff --git a/gcc/config/nds32/nds32_intrinsic.h b/gcc/config/nds32/nds32_intrinsic.h
index 7bb117712dc..24cb2915491 100644
--- a/gcc/config/nds32/nds32_intrinsic.h
+++ b/gcc/config/nds32/nds32_intrinsic.h
@@ -26,6 +26,13 @@
 #ifndef _NDS32_INTRINSIC_H
 #define _NDS32_INTRINSIC_H
 
+typedef signed char int8x4_t __attribute ((vector_size(4)));
+typedef short int16x2_t __attribute ((vector_size(4)));
+typedef int int32x2_t __attribute__((vector_size(8)));
+typedef unsigned char uint8x4_t __attribute__ ((vector_size (4)));
+typedef unsigned short uint16x2_t __attribute__ ((vector_size (4)));
+typedef unsigned int uint32x2_t __attribute__((vector_size(8)));
+
 /* General instrinsic register names.  */
 enum nds32_intrinsic_registers
 {
@@ -691,6 +698,55 @@ enum nds32_dpref
 #define __nds32__tlbop_flua() \
 (__builtin_nds32_tlbop_flua())
 
+#define __nds32__kaddw(a, b) \
+  (__builtin_nds32_kaddw ((a), (b)))
+#define __nds32__kaddh(a, b) \
+  (__builtin_nds32_kaddh ((a), (b)))
+#define __nds32__ksubw(a, b) \
+  (__builtin_nds32_ksubw ((a), (b)))
+#define __nds32__ksubh(a, b) \
+  (__builtin_nds32_ksubh ((a), (b)))
+#define __nds32__kdmbb(a, b) \
+  (__builtin_nds32_kdmbb ((a), (b)))
+#define __nds32__v_kdmbb(a, b) \
+  (__builtin_nds32_v_kdmbb ((a), (b)))
+#define __nds32__kdmbt(a, b) \
+  (__builtin_nds32_kdmbt ((a), (b)))
+#define __nds32__v_kdmbt(a, b) \
+  (__builtin_nds32_v_kdmbt ((a), (b)))
+#define __nds32__kdmtb(a, b) \
+  (__builtin_nds32_kdmtb ((a), (b)))
+#define __nds32__v_kdmtb(a, b) \
+  (__builtin_nds32_v_kdmtb ((a), (b)))
+#define __nds32__kdmtt(a, b) \
+  (__builtin_nds32_kdmtt ((a), (b)))
+#define __nds32__v_kdmtt(a, b) \
+  (__builtin_nds32_v_kdmtt ((a), (b)))
+#define __nds32__khmbb(a, b) \
+  (__builtin_nds32_khmbb ((a), (b)))
+#define __nds32__v_khmbb(a, b) \
+  (__builtin_nds32_v_khmbb ((a), (b)))
+#define __nds32__khmbt(a, b) \
+  (__builtin_nds32_khmbt ((a), (b)))
+#define __nds32__v_khmbt(a, b) \
+  (__builtin_nds32_v_khmbt ((a), (b)))
+#define __nds32__khmtb(a, b) \
+  (__builtin_nds32_khmtb ((a), (b)))
+#define __nds32__v_khmtb(a, b) \
+  (__builtin_nds32_v_khmtb ((a), (b)))
+#define __nds32__khmtt(a, b) \
+  (__builtin_nds32_khmtt ((a), (b)))
+#define __nds32__v_khmtt(a, b) \
+  (__builtin_nds32_v_khmtt ((a), (b)))
+#define __nds32__kslraw(a, b) \
+  (__builtin_nds32_kslraw ((a), (b)))
+#define __nds32__kslraw_u(a, b) \
+  (__builtin_nds32_kslraw_u ((a), (b)))
+
+#define __nds32__rdov() \
+  (__builtin_nds32_rdov())
+#define __nds32__clrov() \
+  (__builtin_nds32_clrov())
 #define __nds32__gie_dis() \
   (__builtin_nds32_gie_dis())
 #define __nds32__gie_en() \
@@ -720,10 +776,622 @@ enum nds32_dpref
 #define __nds32__get_trig_type(a) \
   (__builtin_nds32_get_trig_type ((a)))
 
+#define __nds32__get_unaligned_hw(a) \
+  (__builtin_nds32_unaligned_load_hw ((a)))
+#define __nds32__get_unaligned_w(a) \
+  (__builtin_nds32_unaligned_load_w ((a)))
+#define __nds32__get_unaligned_dw(a) \
+  (__builtin_nds32_unaligned_load_dw ((a)))
+#define __nds32__put_unaligned_hw(a, data) \
+  (__builtin_nds32_unaligned_store_hw ((a), (data)))
+#define __nds32__put_unaligned_w(a, data) \
+  (__builtin_nds32_unaligned_store_w ((a), (data)))
+#define __nds32__put_unaligned_dw(a, data) \
+  (__builtin_nds32_unaligned_store_dw ((a), (data)))
+
+#define __nds32__add16(a, b) \
+  (__builtin_nds32_add16 ((a), (b)))
+#define __nds32__v_uadd16(a, b) \
+  (__builtin_nds32_v_uadd16 ((a), (b)))
+#define __nds32__v_sadd16(a, b) \
+  (__builtin_nds32_v_sadd16 ((a), (b)))
+#define __nds32__radd16(a, b) \
+  (__builtin_nds32_radd16 ((a), (b)))
+#define __nds32__v_radd16(a, b) \
+  (__builtin_nds32_v_radd16 ((a), (b)))
+#define __nds32__uradd16(a, b) \
+  (__builtin_nds32_uradd16 ((a), (b)))
+#define __nds32__v_uradd16(a, b) \
+  (__builtin_nds32_v_uradd16 ((a), (b)))
+#define __nds32__kadd16(a, b) \
+  (__builtin_nds32_kadd16 ((a), (b)))
+#define __nds32__v_kadd16(a, b) \
+  (__builtin_nds32_v_kadd16 ((a), (b)))
+#define __nds32__ukadd16(a, b) \
+  (__builtin_nds32_ukadd16 ((a), (b)))
+#define __nds32__v_ukadd16(a, b) \
+  (__builtin_nds32_v_ukadd16 ((a), (b)))
+#define __nds32__sub16(a, b) \
+  (__builtin_nds32_sub16 ((a), (b)))
+#define __nds32__v_usub16(a, b) \
+  (__builtin_nds32_v_usub16 ((a), (b)))
+#define __nds32__v_ssub16(a, b) \
+  (__builtin_nds32_v_ssub16 ((a), (b)))
+#define __nds32__rsub16(a, b) \
+  (__builtin_nds32_rsub16 ((a), (b)))
+#define __nds32__v_rsub16(a, b) \
+  (__builtin_nds32_v_rsub16 ((a), (b)))
+#define __nds32__ursub16(a, b) \
+  (__builtin_nds32_ursub16 ((a), (b)))
+#define __nds32__v_ursub16(a, b) \
+  (__builtin_nds32_v_ursub16 ((a), (b)))
+#define __nds32__ksub16(a, b) \
+  (__builtin_nds32_ksub16 ((a), (b)))
+#define __nds32__v_ksub16(a, b) \
+  (__builtin_nds32_v_ksub16 ((a), (b)))
+#define __nds32__uksub16(a, b) \
+  (__builtin_nds32_uksub16 ((a), (b)))
+#define __nds32__v_uksub16(a, b) \
+  (__builtin_nds32_v_uksub16 ((a), (b)))
+#define __nds32__cras16(a, b) \
+  (__builtin_nds32_cras16 ((a), (b)))
+#define __nds32__v_ucras16(a, b) \
+  (__builtin_nds32_v_ucras16 ((a), (b)))
+#define __nds32__v_scras16(a, b) \
+  (__builtin_nds32_v_scras16 ((a), (b)))
+#define __nds32__rcras16(a, b) \
+  (__builtin_nds32_rcras16 ((a), (b)))
+#define __nds32__v_rcras16(a, b) \
+  (__builtin_nds32_v_rcras16 ((a), (b)))
+#define __nds32__urcras16(a, b) \
+  (__builtin_nds32_urcras16 ((a), (b)))
+#define __nds32__v_urcras16(a, b) \
+  (__builtin_nds32_v_urcras16 ((a), (b)))
+#define __nds32__kcras16(a, b) \
+  (__builtin_nds32_kcras16 ((a), (b)))
+#define __nds32__v_kcras16(a, b) \
+  (__builtin_nds32_v_kcras16 ((a), (b)))
+#define __nds32__ukcras16(a, b) \
+  (__builtin_nds32_ukcras16 ((a), (b)))
+#define __nds32__v_ukcras16(a, b) \
+  (__builtin_nds32_v_ukcras16 ((a), (b)))
+#define __nds32__crsa16(a, b) \
+  (__builtin_nds32_crsa16 ((a), (b)))
+#define __nds32__v_ucrsa16(a, b) \
+  (__builtin_nds32_v_ucrsa16 ((a), (b)))
+#define __nds32__v_scrsa16(a, b) \
+  (__builtin_nds32_v_scrsa16 ((a), (b)))
+#define __nds32__rcrsa16(a, b) \
+  (__builtin_nds32_rcrsa16 ((a), (b)))
+#define __nds32__v_rcrsa16(a, b) \
+  (__builtin_nds32_v_rcrsa16 ((a), (b)))
+#define __nds32__urcrsa16(a, b) \
+  (__builtin_nds32_urcrsa16 ((a), (b)))
+#define __nds32__v_urcrsa16(a, b) \
+  (__builtin_nds32_v_urcrsa16 ((a), (b)))
+#define __nds32__kcrsa16(a, b) \
+  (__builtin_nds32_kcrsa16 ((a), (b)))
+#define __nds32__v_kcrsa16(a, b) \
+  (__builtin_nds32_v_kcrsa16 ((a), (b)))
+#define __nds32__ukcrsa16(a, b) \
+  (__builtin_nds32_ukcrsa16 ((a), (b)))
+#define __nds32__v_ukcrsa16(a, b) \
+  (__builtin_nds32_v_ukcrsa16 ((a), (b)))
+
+#define __nds32__add8(a, b) \
+  (__builtin_nds32_add8 ((a), (b)))
+#define __nds32__v_uadd8(a, b) \
+  (__builtin_nds32_v_uadd8 ((a), (b)))
+#define __nds32__v_sadd8(a, b) \
+  (__builtin_nds32_v_sadd8 ((a), (b)))
+#define __nds32__radd8(a, b) \
+  (__builtin_nds32_radd8 ((a), (b)))
+#define __nds32__v_radd8(a, b) \
+  (__builtin_nds32_v_radd8 ((a), (b)))
+#define __nds32__uradd8(a, b) \
+  (__builtin_nds32_uradd8 ((a), (b)))
+#define __nds32__v_uradd8(a, b) \
+  (__builtin_nds32_v_uradd8 ((a), (b)))
+#define __nds32__kadd8(a, b) \
+  (__builtin_nds32_kadd8 ((a), (b)))
+#define __nds32__v_kadd8(a, b) \
+  (__builtin_nds32_v_kadd8 ((a), (b)))
+#define __nds32__ukadd8(a, b) \
+  (__builtin_nds32_ukadd8 ((a), (b)))
+#define __nds32__v_ukadd8(a, b) \
+  (__builtin_nds32_v_ukadd8 ((a), (b)))
+#define __nds32__sub8(a, b) \
+  (__builtin_nds32_sub8 ((a), (b)))
+#define __nds32__v_usub8(a, b) \
+  (__builtin_nds32_v_usub8 ((a), (b)))
+#define __nds32__v_ssub8(a, b) \
+  (__builtin_nds32_v_ssub8 ((a), (b)))
+#define __nds32__rsub8(a, b) \
+  (__builtin_nds32_rsub8 ((a), (b)))
+#define __nds32__v_rsub8(a, b) \
+  (__builtin_nds32_v_rsub8 ((a), (b)))
+#define __nds32__ursub8(a, b) \
+  (__builtin_nds32_ursub8 ((a), (b)))
+#define __nds32__v_ursub8(a, b) \
+  (__builtin_nds32_v_ursub8 ((a), (b)))
+#define __nds32__ksub8(a, b) \
+  (__builtin_nds32_ksub8 ((a), (b)))
+#define __nds32__v_ksub8(a, b) \
+  (__builtin_nds32_v_ksub8 ((a), (b)))
+#define __nds32__uksub8(a, b) \
+  (__builtin_nds32_uksub8 ((a), (b)))
+#define __nds32__v_uksub8(a, b) \
+  (__builtin_nds32_v_uksub8 ((a), (b)))
+
+#define __nds32__sra16(a, b) \
+  (__builtin_nds32_sra16 ((a), (b)))
+#define __nds32__v_sra16(a, b) \
+  (__builtin_nds32_v_sra16 ((a), (b)))
+#define __nds32__sra16_u(a, b) \
+  (__builtin_nds32_sra16_u ((a), (b)))
+#define __nds32__v_sra16_u(a, b) \
+  (__builtin_nds32_v_sra16_u ((a), (b)))
+#define __nds32__srl16(a, b) \
+  (__builtin_nds32_srl16 ((a), (b)))
+#define __nds32__v_srl16(a, b) \
+  (__builtin_nds32_v_srl16 ((a), (b)))
+#define __nds32__srl16_u(a, b) \
+  (__builtin_nds32_srl16_u ((a), (b)))
+#define __nds32__v_srl16_u(a, b) \
+  (__builtin_nds32_v_srl16_u ((a), (b)))
+#define __nds32__sll16(a, b) \
+  (__builtin_nds32_sll16 ((a), (b)))
+#define __nds32__v_sll16(a, b) \
+  (__builtin_nds32_v_sll16 ((a), (b)))
+#define __nds32__ksll16(a, b) \
+  (__builtin_nds32_ksll16 ((a), (b)))
+#define __nds32__v_ksll16(a, b) \
+  (__builtin_nds32_v_ksll16 ((a), (b)))
+#define __nds32__kslra16(a, b) \
+  (__builtin_nds32_kslra16 ((a), (b)))
+#define __nds32__v_kslra16(a, b) \
+  (__builtin_nds32_v_kslra16 ((a), (b)))
+#define __nds32__kslra16_u(a, b) \
+  (__builtin_nds32_kslra16_u ((a), (b)))
+#define __nds32__v_kslra16_u(a, b) \
+  (__builtin_nds32_v_kslra16_u ((a), (b)))
+
+#define __nds32__cmpeq16(a, b) \
+  (__builtin_nds32_cmpeq16 ((a), (b)))
+#define __nds32__v_scmpeq16(a, b) \
+  (__builtin_nds32_v_scmpeq16 ((a), (b)))
+#define __nds32__v_ucmpeq16(a, b) \
+  (__builtin_nds32_v_ucmpeq16 ((a), (b)))
+#define __nds32__scmplt16(a, b) \
+  (__builtin_nds32_scmplt16 ((a), (b)))
+#define __nds32__v_scmplt16(a, b) \
+  (__builtin_nds32_v_scmplt16 ((a), (b)))
+#define __nds32__scmple16(a, b) \
+  (__builtin_nds32_scmple16 ((a), (b)))
+#define __nds32__v_scmple16(a, b) \
+  (__builtin_nds32_v_scmple16 ((a), (b)))
+#define __nds32__ucmplt16(a, b) \
+  (__builtin_nds32_ucmplt16 ((a), (b)))
+#define __nds32__v_ucmplt16(a, b) \
+  (__builtin_nds32_v_ucmplt16 ((a), (b)))
+#define __nds32__ucmple16(a, b) \
+  (__builtin_nds32_ucmple16 ((a), (b)))
+#define __nds32__v_ucmple16(a, b) \
+  (__builtin_nds32_v_ucmple16 ((a), (b)))
+
+#define __nds32__cmpeq8(a, b) \
+  (__builtin_nds32_cmpeq8 ((a), (b)))
+#define __nds32__v_scmpeq8(a, b) \
+  (__builtin_nds32_v_scmpeq8 ((a), (b)))
+#define __nds32__v_ucmpeq8(a, b) \
+  (__builtin_nds32_v_ucmpeq8 ((a), (b)))
+#define __nds32__scmplt8(a, b) \
+  (__builtin_nds32_scmplt8 ((a), (b)))
+#define __nds32__v_scmplt8(a, b) \
+  (__builtin_nds32_v_scmplt8 ((a), (b)))
+#define __nds32__scmple8(a, b) \
+  (__builtin_nds32_scmple8 ((a), (b)))
+#define __nds32__v_scmple8(a, b) \
+  (__builtin_nds32_v_scmple8 ((a), (b)))
+#define __nds32__ucmplt8(a, b) \
+  (__builtin_nds32_ucmplt8 ((a), (b)))
+#define __nds32__v_ucmplt8(a, b) \
+  (__builtin_nds32_v_ucmplt8 ((a), (b)))
+#define __nds32__ucmple8(a, b) \
+  (__builtin_nds32_ucmple8 ((a), (b)))
+#define __nds32__v_ucmple8(a, b) \
+  (__builtin_nds32_v_ucmple8 ((a), (b)))
+
+#define __nds32__smin16(a, b) \
+  (__builtin_nds32_smin16 ((a), (b)))
+#define __nds32__v_smin16(a, b) \
+  (__builtin_nds32_v_smin16 ((a), (b)))
+#define __nds32__umin16(a, b) \
+  (__builtin_nds32_umin16 ((a), (b)))
+#define __nds32__v_umin16(a, b) \
+  (__builtin_nds32_v_umin16 ((a), (b)))
+#define __nds32__smax16(a, b) \
+  (__builtin_nds32_smax16 ((a), (b)))
+#define __nds32__v_smax16(a, b) \
+  (__builtin_nds32_v_smax16 ((a), (b)))
+#define __nds32__umax16(a, b) \
+  (__builtin_nds32_umax16 ((a), (b)))
+#define __nds32__v_umax16(a, b) \
+  (__builtin_nds32_v_umax16 ((a), (b)))
+#define __nds32__sclip16(a, b) \
+  (__builtin_nds32_sclip16 ((a), (b)))
+#define __nds32__v_sclip16(a, b) \
+  (__builtin_nds32_v_sclip16 ((a), (b)))
+#define __nds32__uclip16(a, b) \
+  (__builtin_nds32_uclip16 ((a), (b)))
+#define __nds32__v_uclip16(a, b) \
+  (__builtin_nds32_v_uclip16 ((a), (b)))
+#define __nds32__khm16(a, b) \
+  (__builtin_nds32_khm16 ((a), (b)))
+#define __nds32__v_khm16(a, b) \
+  (__builtin_nds32_v_khm16 ((a), (b)))
+#define __nds32__khmx16(a, b) \
+  (__builtin_nds32_khmx16 ((a), (b)))
+#define __nds32__v_khmx16(a, b) \
+  (__builtin_nds32_v_khmx16 ((a), (b)))
+#define __nds32__kabs16(a) \
+  (__builtin_nds32_kabs16 ((a)))
+#define __nds32__v_kabs16(a) \
+  (__builtin_nds32_v_kabs16 ((a)))
+
+#define __nds32__smin8(a, b) \
+  (__builtin_nds32_smin8 ((a), (b)))
+#define __nds32__v_smin8(a, b) \
+  (__builtin_nds32_v_smin8 ((a), (b)))
+#define __nds32__umin8(a, b) \
+  (__builtin_nds32_umin8 ((a), (b)))
+#define __nds32__v_umin8(a, b) \
+  (__builtin_nds32_v_umin8 ((a), (b)))
+#define __nds32__smax8(a, b) \
+  (__builtin_nds32_smax8 ((a), (b)))
+#define __nds32__v_smax8(a, b) \
+  (__builtin_nds32_v_smax8 ((a), (b)))
+#define __nds32__umax8(a, b) \
+  (__builtin_nds32_umax8 ((a), (b)))
+#define __nds32__v_umax8(a, b) \
+  (__builtin_nds32_v_umax8 ((a), (b)))
+#define __nds32__kabs8(a) \
+  (__builtin_nds32_kabs8 ((a)))
+#define __nds32__v_kabs8(a) \
+  (__builtin_nds32_v_kabs8 ((a)))
+
+#define __nds32__sunpkd810(a) \
+  (__builtin_nds32_sunpkd810 ((a)))
+#define __nds32__v_sunpkd810(a) \
+  (__builtin_nds32_v_sunpkd810 ((a)))
+#define __nds32__sunpkd820(a) \
+  (__builtin_nds32_sunpkd820 ((a)))
+#define __nds32__v_sunpkd820(a) \
+  (__builtin_nds32_v_sunpkd820 ((a)))
+#define __nds32__sunpkd830(a) \
+  (__builtin_nds32_sunpkd830 ((a)))
+#define __nds32__v_sunpkd830(a) \
+  (__builtin_nds32_v_sunpkd830 ((a)))
+#define __nds32__sunpkd831(a) \
+  (__builtin_nds32_sunpkd831 ((a)))
+#define __nds32__v_sunpkd831(a) \
+  (__builtin_nds32_v_sunpkd831 ((a)))
+#define __nds32__zunpkd810(a) \
+  (__builtin_nds32_zunpkd810 ((a)))
+#define __nds32__v_zunpkd810(a) \
+  (__builtin_nds32_v_zunpkd810 ((a)))
+#define __nds32__zunpkd820(a) \
+  (__builtin_nds32_zunpkd820 ((a)))
+#define __nds32__v_zunpkd820(a) \
+  (__builtin_nds32_v_zunpkd820 ((a)))
+#define __nds32__zunpkd830(a) \
+  (__builtin_nds32_zunpkd830 ((a)))
+#define __nds32__v_zunpkd830(a) \
+  (__builtin_nds32_v_zunpkd830 ((a)))
+#define __nds32__zunpkd831(a) \
+  (__builtin_nds32_zunpkd831 ((a)))
+#define __nds32__v_zunpkd831(a) \
+  (__builtin_nds32_v_zunpkd831 ((a)))
+
+#define __nds32__raddw(a, b) \
+  (__builtin_nds32_raddw ((a), (b)))
+#define __nds32__uraddw(a, b) \
+  (__builtin_nds32_uraddw ((a), (b)))
+#define __nds32__rsubw(a, b) \
+  (__builtin_nds32_rsubw ((a), (b)))
+#define __nds32__ursubw(a, b) \
+  (__builtin_nds32_ursubw ((a), (b)))
+
+#define __nds32__sra_u(a, b) \
+  (__builtin_nds32_sra_u ((a), (b)))
+#define __nds32__ksll(a, b) \
+  (__builtin_nds32_ksll ((a), (b)))
+#define __nds32__pkbb16(a, b) \
+  (__builtin_nds32_pkbb16 ((a), (b)))
+#define __nds32__v_pkbb16(a, b) \
+  (__builtin_nds32_v_pkbb16 ((a), (b)))
+#define __nds32__pkbt16(a, b) \
+  (__builtin_nds32_pkbt16 ((a), (b)))
+#define __nds32__v_pkbt16(a, b) \
+  (__builtin_nds32_v_pkbt16 ((a), (b)))
+#define __nds32__pktb16(a, b) \
+  (__builtin_nds32_pktb16 ((a), (b)))
+#define __nds32__v_pktb16(a, b) \
+  (__builtin_nds32_v_pktb16 ((a), (b)))
+#define __nds32__pktt16(a, b) \
+  (__builtin_nds32_pktt16 ((a), (b)))
+#define __nds32__v_pktt16(a, b) \
+  (__builtin_nds32_v_pktt16 ((a), (b)))
+
+#define __nds32__smmul(a, b) \
+  (__builtin_nds32_smmul ((a), (b)))
+#define __nds32__smmul_u(a, b) \
+  (__builtin_nds32_smmul_u ((a), (b)))
+#define __nds32__kmmac(r, a, b) \
+  (__builtin_nds32_kmmac ((r), (a), (b)))
+#define __nds32__kmmac_u(r, a, b) \
+  (__builtin_nds32_kmmac_u ((r), (a), (b)))
+#define __nds32__kmmsb(r, a, b) \
+  (__builtin_nds32_kmmsb ((r), (a), (b)))
+#define __nds32__kmmsb_u(r, a, b) \
+  (__builtin_nds32_kmmsb_u ((r), (a), (b)))
+#define __nds32__kwmmul(a, b) \
+  (__builtin_nds32_kwmmul ((a), (b)))
+#define __nds32__kwmmul_u(a, b) \
+  (__builtin_nds32_kwmmul_u ((a), (b)))
+
+#define __nds32__smmwb(a, b) \
+  (__builtin_nds32_smmwb ((a), (b)))
+#define __nds32__v_smmwb(a, b) \
+  (__builtin_nds32_v_smmwb ((a), (b)))
+#define __nds32__smmwb_u(a, b) \
+  (__builtin_nds32_smmwb_u ((a), (b)))
+#define __nds32__v_smmwb_u(a, b) \
+  (__builtin_nds32_v_smmwb_u ((a), (b)))
+#define __nds32__smmwt(a, b) \
+  (__builtin_nds32_smmwt ((a), (b)))
+#define __nds32__v_smmwt(a, b) \
+  (__builtin_nds32_v_smmwt ((a), (b)))
+#define __nds32__smmwt_u(a, b) \
+  (__builtin_nds32_smmwt_u ((a), (b)))
+#define __nds32__v_smmwt_u(a, b) \
+  (__builtin_nds32_v_smmwt_u ((a), (b)))
+#define __nds32__kmmawb(r, a, b) \
+  (__builtin_nds32_kmmawb ((r), (a), (b)))
+#define __nds32__v_kmmawb(r, a, b) \
+  (__builtin_nds32_v_kmmawb ((r), (a), (b)))
+#define __nds32__kmmawb_u(r, a, b) \
+  (__builtin_nds32_kmmawb_u ((r), (a), (b)))
+#define __nds32__v_kmmawb_u(r, a, b) \
+  (__builtin_nds32_v_kmmawb_u ((r), (a), (b)))
+#define __nds32__kmmawt(r, a, b) \
+  (__builtin_nds32_kmmawt ((r), (a), (b)))
+#define __nds32__v_kmmawt(r, a, b) \
+  (__builtin_nds32_v_kmmawt ((r), (a), (b)))
+#define __nds32__kmmawt_u(r, a, b) \
+  (__builtin_nds32_kmmawt_u ((r), (a), (b)))
+#define __nds32__v_kmmawt_u(r, a, b) \
+  (__builtin_nds32_v_kmmawt_u ((r), (a), (b)))
+
+#define __nds32__smbb(a, b) \
+  (__builtin_nds32_smbb ((a), (b)))
+#define __nds32__v_smbb(a, b) \
+  (__builtin_nds32_v_smbb ((a), (b)))
+#define __nds32__smbt(a, b) \
+  (__builtin_nds32_smbt ((a), (b)))
+#define __nds32__v_smbt(a, b) \
+  (__builtin_nds32_v_smbt ((a), (b)))
+#define __nds32__smtt(a, b) \
+  (__builtin_nds32_smtt ((a), (b)))
+#define __nds32__v_smtt(a, b) \
+  (__builtin_nds32_v_smtt ((a), (b)))
+#define __nds32__kmda(a, b) \
+  (__builtin_nds32_kmda ((a), (b)))
+#define __nds32__v_kmda(a, b) \
+  (__builtin_nds32_v_kmda ((a), (b)))
+#define __nds32__kmxda(a, b) \
+  (__builtin_nds32_kmxda ((a), (b)))
+#define __nds32__v_kmxda(a, b) \
+  (__builtin_nds32_v_kmxda ((a), (b)))
+#define __nds32__smds(a, b) \
+  (__builtin_nds32_smds ((a), (b)))
+#define __nds32__v_smds(a, b) \
+  (__builtin_nds32_v_smds ((a), (b)))
+#define __nds32__smdrs(a, b) \
+  (__builtin_nds32_smdrs ((a), (b)))
+#define __nds32__v_smdrs(a, b) \
+  (__builtin_nds32_v_smdrs ((a), (b)))
+#define __nds32__smxds(a, b) \
+  (__builtin_nds32_smxds ((a), (b)))
+#define __nds32__v_smxds(a, b) \
+  (__builtin_nds32_v_smxds ((a), (b)))
+#define __nds32__kmabb(r, a, b) \
+  (__builtin_nds32_kmabb ((r), (a), (b)))
+#define __nds32__v_kmabb(r, a, b) \
+  (__builtin_nds32_v_kmabb ((r), (a), (b)))
+#define __nds32__kmabt(r, a, b) \
+  (__builtin_nds32_kmabt ((r), (a), (b)))
+#define __nds32__v_kmabt(r, a, b) \
+  (__builtin_nds32_v_kmabt ((r), (a), (b)))
+#define __nds32__kmatt(r, a, b) \
+  (__builtin_nds32_kmatt ((r), (a), (b)))
+#define __nds32__v_kmatt(r, a, b) \
+  (__builtin_nds32_v_kmatt ((r), (a), (b)))
+#define __nds32__kmada(r, a, b) \
+  (__builtin_nds32_kmada ((r), (a), (b)))
+#define __nds32__v_kmada(r, a, b) \
+  (__builtin_nds32_v_kmada ((r), (a), (b)))
+#define __nds32__kmaxda(r, a, b) \
+  (__builtin_nds32_kmaxda ((r), (a), (b)))
+#define __nds32__v_kmaxda(r, a, b) \
+  (__builtin_nds32_v_kmaxda ((r), (a), (b)))
+#define __nds32__kmads(r, a, b) \
+  (__builtin_nds32_kmads ((r), (a), (b)))
+#define __nds32__v_kmads(r, a, b) \
+  (__builtin_nds32_v_kmads ((r), (a), (b)))
+#define __nds32__kmadrs(r, a, b) \
+  (__builtin_nds32_kmadrs ((r), (a), (b)))
+#define __nds32__v_kmadrs(r, a, b) \
+  (__builtin_nds32_v_kmadrs ((r), (a), (b)))
+#define __nds32__kmaxds(r, a, b) \
+  (__builtin_nds32_kmaxds ((r), (a), (b)))
+#define __nds32__v_kmaxds(r, a, b) \
+  (__builtin_nds32_v_kmaxds ((r), (a), (b)))
+#define __nds32__kmsda(r, a, b) \
+  (__builtin_nds32_kmsda ((r), (a), (b)))
+#define __nds32__v_kmsda(r, a, b) \
+  (__builtin_nds32_v_kmsda ((r), (a), (b)))
+#define __nds32__kmsxda(r, a, b) \
+  (__builtin_nds32_kmsxda ((r), (a), (b)))
+#define __nds32__v_kmsxda(r, a, b) \
+  (__builtin_nds32_v_kmsxda ((r), (a), (b)))
+
+#define __nds32__smal(a, b) \
+  (__builtin_nds32_smal ((a), (b)))
+#define __nds32__v_smal(a, b) \
+  (__builtin_nds32_v_smal ((a), (b)))
+
+#define __nds32__bitrev(a, b) \
+  (__builtin_nds32_bitrev ((a), (b)))
+#define __nds32__wext(a, b) \
+  (__builtin_nds32_wext ((a), (b)))
+#define __nds32__bpick(r, a, b) \
+  (__builtin_nds32_bpick ((r), (a), (b)))
+#define __nds32__insb(r, a, b) \
+  (__builtin_nds32_insb ((r), (a), (b)))
+
+#define __nds32__sadd64(a, b) \
+  (__builtin_nds32_sadd64 ((a), (b)))
+#define __nds32__uadd64(a, b) \
+  (__builtin_nds32_uadd64 ((a), (b)))
+#define __nds32__radd64(a, b) \
+  (__builtin_nds32_radd64 ((a), (b)))
+#define __nds32__uradd64(a, b) \
+  (__builtin_nds32_uradd64 ((a), (b)))
+#define __nds32__kadd64(a, b) \
+  (__builtin_nds32_kadd64 ((a), (b)))
+#define __nds32__ukadd64(a, b) \
+  (__builtin_nds32_ukadd64 ((a), (b)))
+#define __nds32__ssub64(a, b) \
+  (__builtin_nds32_ssub64 ((a), (b)))
+#define __nds32__usub64(a, b) \
+  (__builtin_nds32_usub64 ((a), (b)))
+#define __nds32__rsub64(a, b) \
+  (__builtin_nds32_rsub64 ((a), (b)))
+#define __nds32__ursub64(a, b) \
+  (__builtin_nds32_ursub64 ((a), (b)))
+#define __nds32__ksub64(a, b) \
+  (__builtin_nds32_ksub64 ((a), (b)))
+#define __nds32__uksub64(a, b) \
+  (__builtin_nds32_uksub64 ((a), (b)))
+
+#define __nds32__smar64(r, a, b) \
+  (__builtin_nds32_smar64 ((r), (a), (b)))
+#define __nds32__smsr64(r, a, b) \
+  (__builtin_nds32_smsr64 ((r), (a), (b)))
+#define __nds32__umar64(r, a, b) \
+  (__builtin_nds32_umar64 ((r), (a), (b)))
+#define __nds32__umsr64(r, a, b) \
+  (__builtin_nds32_umsr64 ((r), (a), (b)))
+#define __nds32__kmar64(r, a, b) \
+  (__builtin_nds32_kmar64 ((r), (a), (b)))
+#define __nds32__kmsr64(r, a, b) \
+  (__builtin_nds32_kmsr64 ((r), (a), (b)))
+#define __nds32__ukmar64(r, a, b) \
+  (__builtin_nds32_ukmar64 ((r), (a), (b)))
+#define __nds32__ukmsr64(r, a, b) \
+  (__builtin_nds32_ukmsr64 ((r), (a), (b)))
+
+#define __nds32__smalbb(r, a, b) \
+  (__builtin_nds32_smalbb ((r), (a), (b)))
+#define __nds32__v_smalbb(r, a, b) \
+  (__builtin_nds32_v_smalbb ((r), (a), (b)))
+#define __nds32__smalbt(r, a, b) \
+  (__builtin_nds32_smalbt ((r), (a), (b)))
+#define __nds32__v_smalbt(r, a, b) \
+  (__builtin_nds32_v_smalbt ((r), (a), (b)))
+#define __nds32__smaltt(r, a, b) \
+  (__builtin_nds32_smaltt ((r), (a), (b)))
+#define __nds32__v_smaltt(r, a, b) \
+  (__builtin_nds32_v_smaltt ((r), (a), (b)))
+#define __nds32__smalda(r, a, b) \
+  (__builtin_nds32_smalda ((r), (a), (b)))
+#define __nds32__v_smalda(r, a, b) \
+  (__builtin_nds32_v_smalda ((r), (a), (b)))
+#define __nds32__smalxda(r, a, b) \
+  (__builtin_nds32_smalxda ((r), (a), (b)))
+#define __nds32__v_smalxda(r, a, b) \
+  (__builtin_nds32_v_smalxda ((r), (a), (b)))
+#define __nds32__smalds(r, a, b) \
+  (__builtin_nds32_smalds ((r), (a), (b)))
+#define __nds32__v_smalds(r, a, b) \
+  (__builtin_nds32_v_smalds ((r), (a), (b)))
+#define __nds32__smaldrs(r, a, b) \
+  (__builtin_nds32_smaldrs ((r), (a), (b)))
+#define __nds32__v_smaldrs(r, a, b) \
+  (__builtin_nds32_v_smaldrs ((r), (a), (b)))
+#define __nds32__smalxds(r, a, b) \
+  (__builtin_nds32_smalxds ((r), (a), (b)))
+#define __nds32__v_smalxds(r, a, b) \
+  (__builtin_nds32_v_smalxds ((r), (a), (b)))
+#define __nds32__smslda(r, a, b) \
+  (__builtin_nds32_smslda ((r), (a), (b)))
+#define __nds32__v_smslda(r, a, b) \
+  (__builtin_nds32_v_smslda ((r), (a), (b)))
+#define __nds32__smslxda(r, a, b) \
+  (__builtin_nds32_smslxda ((r), (a), (b)))
+#define __nds32__v_smslxda(r, a, b) \
+  (__builtin_nds32_v_smslxda ((r), (a), (b)))
+
+#define __nds32__smul16(a, b) \
+  (__builtin_nds32_smul16 ((a), (b)))
+#define __nds32__v_smul16(a, b) \
+  (__builtin_nds32_v_smul16 ((a), (b)))
+#define __nds32__smulx16(a, b) \
+  (__builtin_nds32_smulx16 ((a), (b)))
+#define __nds32__v_smulx16(a, b) \
+  (__builtin_nds32_v_smulx16 ((a), (b)))
+#define __nds32__umul16(a, b) \
+  (__builtin_nds32_umul16 ((a), (b)))
+#define __nds32__v_umul16(a, b) \
+  (__builtin_nds32_v_umul16 ((a), (b)))
+#define __nds32__umulx16(a, b) \
+  (__builtin_nds32_umulx16 ((a), (b)))
+#define __nds32__v_umulx16(a, b) \
+  (__builtin_nds32_v_umulx16 ((a), (b)))
+
+#define __nds32__uclip32(a, imm) \
+  (__builtin_nds32_uclip32 ((a), (imm)))
+#define __nds32__sclip32(a, imm) \
+  (__builtin_nds32_sclip32 ((a), (imm)))
+#define __nds32__kabs(a) \
+  (__builtin_nds32_kabs ((a)))
+
 #define __nds32__unaligned_feature() \
   (__builtin_nds32_unaligned_feature())
 #define __nds32__enable_unaligned() \
   (__builtin_nds32_enable_unaligned())
 #define __nds32__disable_unaligned() \
   (__builtin_nds32_disable_unaligned())
+
+#define __nds32__get_unaligned_u16x2(a) \
+  (__builtin_nds32_get_unaligned_u16x2 ((a)))
+#define __nds32__get_unaligned_s16x2(a) \
+  (__builtin_nds32_get_unaligned_s16x2 ((a)))
+#define __nds32__get_unaligned_u8x4(a) \
+  (__builtin_nds32_get_unaligned_u8x4 ((a)))
+#define __nds32__get_unaligned_s8x4(a) \
+  (__builtin_nds32_get_unaligned_s8x4 ((a)))
+
+#define __nds32__put_unaligned_u16x2(a, data) \
+  (__builtin_nds32_put_unaligned_u16x2 ((a), (data)))
+#define __nds32__put_unaligned_s16x2(a, data) \
+  (__builtin_nds32_put_unaligned_s16x2 ((a), (data)))
+#define __nds32__put_unaligned_u8x4(a, data) \
+  (__builtin_nds32_put_unaligned_u8x4 ((a), (data)))
+#define __nds32__put_unaligned_s8x4(a, data) \
+  (__builtin_nds32_put_unaligned_s8x4 ((a), (data)))
+
+#define NDS32ATTR_SIGNATURE              __attribute__((signature))
+
 #endif /* nds32_intrinsic.h */
diff --git a/gcc/config/nds32/nds32_isr.h b/gcc/config/nds32/nds32_isr.h
new file mode 100644
index 00000000000..8ea58f951e1
--- /dev/null
+++ b/gcc/config/nds32/nds32_isr.h
@@ -0,0 +1,526 @@
+/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _NDS32_ISR_H
+#define _NDS32_ISR_H
+
+/* Attribute of a interrupt or exception handler:
+
+   NDS32_READY_NESTED: This handler is interruptible if user re-enable GIE bit.
+   NDS32_NESTED      : This handler is interruptible.  This is not suitable
+                       exception handler.
+   NDS32_NOT_NESTED  : This handler is NOT interruptible.  Users have to do
+                       some work if nested is wanted
+   NDS32_CRITICAL    : This handler is critical ISR, which means it is small
+                       and efficient.  */
+#define NDS32_READY_NESTED   0
+#define NDS32_NESTED         1
+#define NDS32_NOT_NESTED     2
+#define NDS32_CRITICAL       3
+
+/* Attribute of a interrupt or exception handler:
+
+   NDS32_SAVE_ALL_REGS    : Save all registers in a table.
+   NDS32_SAVE_PARTIAL_REGS: Save partial registers.  */
+#define NDS32_SAVE_CALLER_REGS   0
+#define NDS32_SAVE_ALL_REGS      1
+
+/* There are two version of Register table for interrupt and exception handler,
+   one for 16-register CPU the other for 32-register CPU.  These structures are
+   used for context switching or system call handling.  The address of this
+   data can be get from the input argument of the handler functions.
+
+   For system call handling, r0 to r5 are used to pass arguments.  If more
+   arguments are used they are put into the stack and its starting address is
+   in sp.  Return value of system call can be put into r0 and r1 upon exit from
+   system call handler.  System call ID is in a system register and it can be
+   fetched via intrinsic function.  For more information please read ABI and
+   other related documents.
+
+   For context switching, at least 2 values need to saved in kernel.  One is
+   IPC and the other is the stack address of current task.  Use intrinsic
+   function to get IPC and  the input argument of the handler functions + 8 to
+   get stack address of current task.  To do context switching, you replace
+   new_sp with the stack address of new task and replace IPC system register
+   with IPC of new task, then, just return from handler.  The context switching
+   will happen.  */
+
+/* Register table for exception handler; 32-register version.  */
+typedef struct
+{
+  int r0;
+  int r1;
+  int r2;
+  int r3;
+  int r4;
+  int r5;
+  int r6;
+  int r7;
+  int r8;
+  int r9;
+  int r10;
+  int r11;
+  int r12;
+  int r13;
+  int r14;
+  int r15;
+  int r16;
+  int r17;
+  int r18;
+  int r19;
+  int r20;
+  int r21;
+  int r22;
+  int r23;
+  int r24;
+  int r25;
+  int r26;
+  int r27;
+  int fp;
+  int gp;
+  int lp;
+  int sp;
+} NDS32_GPR32;
+
+/* Register table for exception handler; 16-register version.  */
+typedef struct
+{
+  int r0;
+  int r1;
+  int r2;
+  int r3;
+  int r4;
+  int r5;
+  int r6;
+  int r7;
+  int r8;
+  int r9;
+  int r10;
+  int r15;
+  int fp;
+  int gp;
+  int lp;
+  int sp;
+} NDS32_GPR16;
+
+
+/* Use NDS32_REG32_TAB or NDS32_REG16_TAB in your program to
+   access register table.  */
+typedef struct
+{
+  union
+    {
+      int          reg_a[32] ;
+      NDS32_GPR32  reg_s ;
+    } u ;
+} NDS32_REG32_TAB;
+
+typedef struct
+{
+  union
+    {
+      int          reg_a[16] ;
+      NDS32_GPR16  reg_s ;
+    } u ;
+} NDS32_REG16_TAB;
+
+typedef struct
+{
+  int    d0lo;
+  int    d0hi;
+  int    d1lo;
+  int    d1hi;
+} NDS32_DX_TAB;
+
+typedef struct
+{
+#ifdef __NDS32_EB__
+  float    fsr0;
+  float    fsr1;
+  float    fsr2;
+  float    fsr3;
+  float    fsr4;
+  float    fsr5;
+  float    fsr6;
+  float    fsr7;
+#else
+  float    fsr1;
+  float    fsr0;
+  float    fsr3;
+  float    fsr2;
+  float    fsr5;
+  float    fsr4;
+  float    fsr7;
+  float    fsr6;
+#endif
+} NDS32_FSR8;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+} NDS32_DSR4;
+
+typedef struct
+{
+#ifdef __NDS32_EB__
+  float    fsr0;
+  float    fsr1;
+  float    fsr2;
+  float    fsr3;
+  float    fsr4;
+  float    fsr5;
+  float    fsr6;
+  float    fsr7;
+  float    fsr8;
+  float    fsr9;
+  float    fsr10;
+  float    fsr11;
+  float    fsr12;
+  float    fsr13;
+  float    fsr14;
+  float    fsr15;
+#else
+  float    fsr1;
+  float    fsr0;
+  float    fsr3;
+  float    fsr2;
+  float    fsr5;
+  float    fsr4;
+  float    fsr7;
+  float    fsr6;
+  float    fsr9;
+  float    fsr8;
+  float    fsr11;
+  float    fsr10;
+  float    fsr13;
+  float    fsr12;
+  float    fsr15;
+  float    fsr14;
+#endif
+} NDS32_FSR16;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+  double   dsr4;
+  double   dsr5;
+  double   dsr6;
+  double   dsr7;
+} NDS32_DSR8;
+
+typedef struct
+{
+#ifdef __NDS32_EB__
+  float    fsr0;
+  float    fsr1;
+  float    fsr2;
+  float    fsr3;
+  float    fsr4;
+  float    fsr5;
+  float    fsr6;
+  float    fsr7;
+  float    fsr8;
+  float    fsr9;
+  float    fsr10;
+  float    fsr11;
+  float    fsr12;
+  float    fsr13;
+  float    fsr14;
+  float    fsr15;
+  float    fsr16;
+  float    fsr17;
+  float    fsr18;
+  float    fsr19;
+  float    fsr20;
+  float    fsr21;
+  float    fsr22;
+  float    fsr23;
+  float    fsr24;
+  float    fsr25;
+  float    fsr26;
+  float    fsr27;
+  float    fsr28;
+  float    fsr29;
+  float    fsr30;
+  float    fsr31;
+#else
+  float    fsr1;
+  float    fsr0;
+  float    fsr3;
+  float    fsr2;
+  float    fsr5;
+  float    fsr4;
+  float    fsr7;
+  float    fsr6;
+  float    fsr9;
+  float    fsr8;
+  float    fsr11;
+  float    fsr10;
+  float    fsr13;
+  float    fsr12;
+  float    fsr15;
+  float    fsr14;
+  float    fsr17;
+  float    fsr16;
+  float    fsr19;
+  float    fsr18;
+  float    fsr21;
+  float    fsr20;
+  float    fsr23;
+  float    fsr22;
+  float    fsr25;
+  float    fsr24;
+  float    fsr27;
+  float    fsr26;
+  float    fsr29;
+  float    fsr28;
+  float    fsr31;
+  float    fsr30;
+#endif
+} NDS32_FSR32;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+  double   dsr4;
+  double   dsr5;
+  double   dsr6;
+  double   dsr7;
+  double   dsr8;
+  double   dsr9;
+  double   dsr10;
+  double   dsr11;
+  double   dsr12;
+  double   dsr13;
+  double   dsr14;
+  double   dsr15;
+} NDS32_DSR16;
+
+typedef struct
+{
+  double   dsr0;
+  double   dsr1;
+  double   dsr2;
+  double   dsr3;
+  double   dsr4;
+  double   dsr5;
+  double   dsr6;
+  double   dsr7;
+  double   dsr8;
+  double   dsr9;
+  double   dsr10;
+  double   dsr11;
+  double   dsr12;
+  double   dsr13;
+  double   dsr14;
+  double   dsr15;
+  double   dsr16;
+  double   dsr17;
+  double   dsr18;
+  double   dsr19;
+  double   dsr20;
+  double   dsr21;
+  double   dsr22;
+  double   dsr23;
+  double   dsr24;
+  double   dsr25;
+  double   dsr26;
+  double   dsr27;
+  double   dsr28;
+  double   dsr29;
+  double   dsr30;
+  double   dsr31;
+} NDS32_DSR32;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR8   fsr_s ;
+      NDS32_DSR4   dsr_s ;
+    } u ;
+} NDS32_FPU8_TAB;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR16  fsr_s ;
+      NDS32_DSR8   dsr_s ;
+    } u ;
+} NDS32_FPU16_TAB;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR32  fsr_s ;
+      NDS32_DSR16  dsr_s ;
+    } u ;
+} NDS32_FPU32_TAB;
+
+typedef struct
+{
+  union
+    {
+      NDS32_FSR32  fsr_s ;
+      NDS32_DSR32  dsr_s ;
+    } u ;
+} NDS32_FPU64_TAB;
+
+typedef struct
+{
+  int    ipc;
+  int    ipsw;
+#if defined(NDS32_EXT_FPU_CONFIG_0)
+  NDS32_FPU8_TAB fpr;
+#elif defined(NDS32_EXT_FPU_CONFIG_1)
+  NDS32_FPU16_TAB fpr;
+#elif defined(NDS32_EXT_FPU_CONFIG_2)
+  NDS32_FPU32_TAB fpr;
+#elif defined(NDS32_EXT_FPU_CONFIG_3)
+  NDS32_FPU64_TAB fpr;
+#endif
+#if __NDS32_DX_REGS__
+  NDS32_DX_TAB dxr;
+#endif
+#if __NDS32_EXT_IFC__
+  int    ifc_lp;
+  int    filler;
+#endif
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
+  NDS32_REG16_TAB gpr;
+#else
+  NDS32_REG32_TAB gpr;
+#endif
+} NDS32_CONTEXT;
+
+/* Predefined Vector Definition.
+
+   For IVIC Mode: 9 to 14 are for hardware interrupt
+                  and 15 is for software interrupt.
+   For EVIC Mode: 9 to 72 are for hardware interrupt
+                  and software interrupt can be routed to any one of them.
+
+   You may want to define your hardware interrupts in the following way
+   for easy maintainance.
+
+     IVIC mode:
+       #define MY_HW_IVIC_TIMER NDS32_VECTOR_INTERRUPT_HW0 + 1
+       #define MY_HW_IVIC_USB   NDS32_VECTOR_INTERRUPT_HW0 + 3
+     EVIC mode:
+     #define MY_HW_EVIC_DMA   NDS32_VECTOR_INTERRUPT_HW0 + 2
+     #define MY_HW_EVIC_SWI   NDS32_VECTOR_INTERRUPT_HW0 + 10 */
+#define NDS32_VECTOR_RESET               0
+#define NDS32_VECTOR_TLB_FILL            1
+#define NDS32_VECTOR_PTE_NOT_PRESENT     2
+#define NDS32_VECTOR_TLB_MISC            3
+#define NDS32_VECTOR_TLB_VLPT_MISS       4
+#define NDS32_VECTOR_MACHINE_ERROR       5
+#define NDS32_VECTOR_DEBUG_RELATED       6
+#define NDS32_VECTOR_GENERAL_EXCEPTION   7
+#define NDS32_VECTOR_SYSCALL             8
+#define NDS32_VECTOR_INTERRUPT_HW0       9
+#define NDS32_VECTOR_INTERRUPT_HW1       10
+#define NDS32_VECTOR_INTERRUPT_HW2       11
+#define NDS32_VECTOR_INTERRUPT_HW3       12
+#define NDS32_VECTOR_INTERRUPT_HW4       13
+#define NDS32_VECTOR_INTERRUPT_HW5       14
+#define NDS32_VECTOR_INTERRUPT_HW6       15
+#define NDS32_VECTOR_SWI                 15  /* THIS IS FOR IVIC MODE ONLY */
+#define NDS32_VECTOR_INTERRUPT_HW7       16
+#define NDS32_VECTOR_INTERRUPT_HW8       17
+#define NDS32_VECTOR_INTERRUPT_HW9       18
+#define NDS32_VECTOR_INTERRUPT_HW10      19
+#define NDS32_VECTOR_INTERRUPT_HW11      20
+#define NDS32_VECTOR_INTERRUPT_HW12      21
+#define NDS32_VECTOR_INTERRUPT_HW13      22
+#define NDS32_VECTOR_INTERRUPT_HW14      23
+#define NDS32_VECTOR_INTERRUPT_HW15      24
+#define NDS32_VECTOR_INTERRUPT_HW16      25
+#define NDS32_VECTOR_INTERRUPT_HW17      26
+#define NDS32_VECTOR_INTERRUPT_HW18      27
+#define NDS32_VECTOR_INTERRUPT_HW19      28
+#define NDS32_VECTOR_INTERRUPT_HW20      29
+#define NDS32_VECTOR_INTERRUPT_HW21      30
+#define NDS32_VECTOR_INTERRUPT_HW22      31
+#define NDS32_VECTOR_INTERRUPT_HW23      32
+#define NDS32_VECTOR_INTERRUPT_HW24      33
+#define NDS32_VECTOR_INTERRUPT_HW25      34
+#define NDS32_VECTOR_INTERRUPT_HW26      35
+#define NDS32_VECTOR_INTERRUPT_HW27      36
+#define NDS32_VECTOR_INTERRUPT_HW28      37
+#define NDS32_VECTOR_INTERRUPT_HW29      38
+#define NDS32_VECTOR_INTERRUPT_HW30      39
+#define NDS32_VECTOR_INTERRUPT_HW31      40
+#define NDS32_VECTOR_INTERRUPT_HW32      41
+#define NDS32_VECTOR_INTERRUPT_HW33      42
+#define NDS32_VECTOR_INTERRUPT_HW34      43
+#define NDS32_VECTOR_INTERRUPT_HW35      44
+#define NDS32_VECTOR_INTERRUPT_HW36      45
+#define NDS32_VECTOR_INTERRUPT_HW37      46
+#define NDS32_VECTOR_INTERRUPT_HW38      47
+#define NDS32_VECTOR_INTERRUPT_HW39      48
+#define NDS32_VECTOR_INTERRUPT_HW40      49
+#define NDS32_VECTOR_INTERRUPT_HW41      50
+#define NDS32_VECTOR_INTERRUPT_HW42      51
+#define NDS32_VECTOR_INTERRUPT_HW43      52
+#define NDS32_VECTOR_INTERRUPT_HW44      53
+#define NDS32_VECTOR_INTERRUPT_HW45      54
+#define NDS32_VECTOR_INTERRUPT_HW46      55
+#define NDS32_VECTOR_INTERRUPT_HW47      56
+#define NDS32_VECTOR_INTERRUPT_HW48      57
+#define NDS32_VECTOR_INTERRUPT_HW49      58
+#define NDS32_VECTOR_INTERRUPT_HW50      59
+#define NDS32_VECTOR_INTERRUPT_HW51      60
+#define NDS32_VECTOR_INTERRUPT_HW52      61
+#define NDS32_VECTOR_INTERRUPT_HW53      62
+#define NDS32_VECTOR_INTERRUPT_HW54      63
+#define NDS32_VECTOR_INTERRUPT_HW55      64
+#define NDS32_VECTOR_INTERRUPT_HW56      65
+#define NDS32_VECTOR_INTERRUPT_HW57      66
+#define NDS32_VECTOR_INTERRUPT_HW58      67
+#define NDS32_VECTOR_INTERRUPT_HW59      68
+#define NDS32_VECTOR_INTERRUPT_HW60      69
+#define NDS32_VECTOR_INTERRUPT_HW61      70
+#define NDS32_VECTOR_INTERRUPT_HW62      71
+#define NDS32_VECTOR_INTERRUPT_HW63      72
+
+#define NDS32ATTR_RESET(option)          __attribute__((reset(option)))
+#define NDS32ATTR_EXCEPT(type)           __attribute__((exception(type)))
+#define NDS32ATTR_EXCEPTION(type)        __attribute__((exception(type)))
+#define NDS32ATTR_INTERRUPT(type)        __attribute__((interrupt(type)))
+#define NDS32ATTR_ISR(type)              __attribute__((interrupt(type)))
+
+#endif /* nds32_isr.h */
diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md
index 34288076f42..12cd2623f1c 100644
--- a/gcc/config/nds32/pipelines.md
+++ b/gcc/config/nds32/pipelines.md
@@ -43,6 +43,24 @@
 (include "nds32-n9-2r1w.md")
 
 
+;; ------------------------------------------------------------------------
+;; Include N10 pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-n10.md")
+
+
+;; ------------------------------------------------------------------------
+;; Include Graywolf pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-graywolf.md")
+
+
+;; ------------------------------------------------------------------------
+;; Include N12/N13 pipeline settings.
+;; ------------------------------------------------------------------------
+(include "nds32-n13.md")
+
+
 ;; ------------------------------------------------------------------------
 ;; Define simple pipeline settings.
 ;; ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md
index 9eb84685514..ee4cf3cf48e 100644
--- a/gcc/config/nds32/predicates.md
+++ b/gcc/config/nds32/predicates.md
@@ -40,7 +40,15 @@
   (match_code "mult,and,ior,xor"))
 
 (define_predicate "nds32_symbolic_operand"
-  (match_code "const,symbol_ref,label_ref"))
+  (and (match_code "const,symbol_ref,label_ref")
+       (match_test "!(TARGET_ICT_MODEL_LARGE
+		      && nds32_indirect_call_referenced_p (op))")))
+
+(define_predicate "nds32_nonunspec_symbolic_operand"
+  (and (match_code "const,symbol_ref,label_ref")
+       (match_test "!flag_pic && nds32_const_unspec_p (op)
+		    && !(TARGET_ICT_MODEL_LARGE
+			 && nds32_indirect_call_referenced_p (op))")))
 
 (define_predicate "nds32_reg_constant_operand"
   (ior (match_operand 0 "register_operand")
@@ -56,14 +64,51 @@
        (and (match_operand 0 "const_int_operand")
 	    (match_test "satisfies_constraint_Is11 (op)"))))
 
+(define_predicate "nds32_imm_0_1_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (match_test "satisfies_constraint_Iv00 (op)")
+	    (match_test "satisfies_constraint_Iv01 (op)"))))
+
+(define_predicate "nds32_imm_1_2_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (match_test "satisfies_constraint_Iv01 (op)")
+	    (match_test "satisfies_constraint_Iv02 (op)"))))
+
+(define_predicate "nds32_imm_1_2_4_8_operand"
+  (and (match_operand 0 "const_int_operand")
+       (ior (ior (match_test "satisfies_constraint_Iv01 (op)")
+		 (match_test "satisfies_constraint_Iv02 (op)"))
+	    (ior (match_test "satisfies_constraint_Iv04 (op)")
+		 (match_test "satisfies_constraint_Iv08 (op)")))))
+
+(define_predicate "nds32_imm2u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu02 (op)")))
+
+(define_predicate "nds32_imm4u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu04 (op)")))
+
 (define_predicate "nds32_imm5u_operand"
   (and (match_operand 0 "const_int_operand")
        (match_test "satisfies_constraint_Iu05 (op)")))
 
+(define_predicate "nds32_imm6u_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "satisfies_constraint_Iu06 (op)")))
+
+(define_predicate "nds32_rimm4u_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "nds32_imm4u_operand")))
+
 (define_predicate "nds32_rimm5u_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "nds32_imm5u_operand")))
 
+(define_predicate "nds32_rimm6u_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "nds32_imm6u_operand")))
+
 (define_predicate "nds32_move_operand"
   (and (match_operand 0 "general_operand")
        (not (match_code "high,const,symbol_ref,label_ref")))
@@ -78,6 +123,20 @@
   return true;
 })
 
+(define_predicate "nds32_vmove_operand"
+  (and (match_operand 0 "general_operand")
+       (not (match_code "high,const,symbol_ref,label_ref")))
+{
+  /* If the constant op does NOT satisfy Is20 nor Ihig,
+     we can not perform move behavior by a single instruction.  */
+  if (GET_CODE (op) == CONST_VECTOR
+      && !satisfies_constraint_CVs2 (op)
+      && !satisfies_constraint_CVhi (op))
+    return false;
+
+  return true;
+})
+
 (define_predicate "nds32_and_operand"
   (match_operand 0 "nds32_reg_constant_operand")
 {
@@ -127,6 +186,15 @@
   (ior (match_operand 0 "nds32_symbolic_operand")
        (match_operand 0 "nds32_general_register_operand")))
 
+(define_predicate "nds32_insv_operand"
+  (match_code "const_int")
+{
+  return INTVAL (op) == 0
+	 || INTVAL (op) == 8
+	 || INTVAL (op) == 16
+	 || INTVAL (op) == 24;
+})
+
 (define_predicate "nds32_lmw_smw_base_operand"
   (and (match_code "mem")
        (match_test "nds32_valid_smw_lwm_base_p (op)")))
diff --git a/gcc/config/nds32/t-elf b/gcc/config/nds32/t-elf
new file mode 100644
index 00000000000..3401dae4881
--- /dev/null
+++ b/gcc/config/nds32/t-elf
@@ -0,0 +1,42 @@
+# The multilib settings of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2018 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
+# driver program which options are defaults for this target and thus
+# do not need to be handled specially.
+MULTILIB_OPTIONS += mcmodel=small/mcmodel=medium/mcmodel=large mvh
+
+ifneq ($(filter graywolf,$(TM_MULTILIB_CONFIG)),)
+MULTILIB_OPTIONS += mcpu=graywolf
+endif
+
+ifneq ($(filter dsp,$(TM_MULTILIB_CONFIG)),)
+MULTILIB_OPTIONS += mext-dsp
+endif
+
+ifneq ($(filter zol,$(TM_MULTILIB_CONFIG)),)
+MULTILIB_OPTIONS += mext-zol
+endif
+
+ifneq ($(filter v3m+,$(TM_MULTILIB_CONFIG)),)
+MULTILIB_OPTIONS += march=v3m+
+endif
+
+# ------------------------------------------------------------------------
diff --git a/gcc/config/nds32/t-linux b/gcc/config/nds32/t-linux
new file mode 100644
index 00000000000..33328f65e7b
--- /dev/null
+++ b/gcc/config/nds32/t-linux
@@ -0,0 +1,26 @@
+# The multilib settings of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2018 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the
+# driver program which options are defaults for this target and thus
+# do not need to be handled specially.
+MULTILIB_OPTIONS +=
+
+# ------------------------------------------------------------------------
diff --git a/gcc/configure b/gcc/configure
index 6121e163259..07a485e8598 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -27784,7 +27784,7 @@ esac
 # version to the per-target configury.
 case "$cpu_type" in
   aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \
-  | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \
+  | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu \
   | tilegx | tilepro | visium | xstormy16 | xtensa)
     insn="nop"
     ;;
diff --git a/gcc/configure.ac b/gcc/configure.ac
index b066cc609e1..ae73df30b42 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -4910,7 +4910,7 @@ esac
 # version to the per-target configury.
 case "$cpu_type" in
   aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \
-  | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \
+  | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu \
   | tilegx | tilepro | visium | xstormy16 | xtensa)
     insn="nop"
     ;;
diff --git a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c
index 4eeb8c7a30b..6cd02bc2aa8 100644
--- a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c
@@ -1,4 +1,5 @@
 /* { dg-skip-if "requires frame pointers" { *-*-* } "-fomit-frame-pointer" "" } */
+/* { dg-additional-options "-malways-save-lp" { target nds32*-*-* } } */
 /* { dg-require-effective-target return_address } */
 
 extern void exit (int);
diff --git a/gcc/testsuite/gcc.dg/lower-subreg-1.c b/gcc/testsuite/gcc.dg/lower-subreg-1.c
index 6bae73055a9..4a5099bfbdb 100644
--- a/gcc/testsuite/gcc.dg/lower-subreg-1.c
+++ b/gcc/testsuite/gcc.dg/lower-subreg-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */
+/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* nds32*-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */
 /* { dg-options "-O -fdump-rtl-subreg1" } */
 /* { dg-additional-options "-mno-stv" { target ia32 } } */
 /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } } */
diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c
index 45d2c7b6aae..b9ae9dc6030 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-1.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-1.c
@@ -2,6 +2,7 @@
 /* { dg-options "-fstack-usage" } */
 /* nvptx doesn't have a reg allocator, and hence no stack usage data.  */
 /* { dg-skip-if "" { nvptx-*-* } } */
+/* { dg-options "-fstack-usage -fno-omit-frame-pointer" { target { nds32*-*-* } } } */
 
 /* This is aimed at testing basic support for -fstack-usage in the back-ends.
    See the SPARC back-end for example (grep flag_stack_usage_info in sparc.c).
diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c
deleted file mode 100644
index 2dceed98ac8..00000000000
--- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c
+++ /dev/null
@@ -1,11 +0,0 @@
-/* Verify that we generate setgie.d instruction with builtin function.  */
-
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tsetgie.d" } }  */
-
-void
-test (void)
-{
-  __builtin_nds32_setgie_dis ();
-}
diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c
deleted file mode 100644
index 892887019c9..00000000000
--- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c
+++ /dev/null
@@ -1,11 +0,0 @@
-/* Verify that we generate setgie.e instruction with builtin function.  */
-
-/* { dg-do compile }  */
-/* { dg-options "-O0" }  */
-/* { dg-final { scan-assembler "\\tsetgie.e" } }  */
-
-void
-test (void)
-{
-  __builtin_nds32_setgie_en ();
-}
diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c
new file mode 100644
index 00000000000..3b4eede295d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c
@@ -0,0 +1,36 @@
+/* This is a test program for checking gie with
+   mtsr/mfsr instruction.  */
+
+/* { dg-do run } */
+/* { dg-options "-O0" } */
+
+#include <nds32_intrinsic.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  unsigned int psw;
+  unsigned int gie;
+  unsigned int pfm_ctl;
+
+  __nds32__setgie_en ();
+  __nds32__dsb(); /* This is needed for waiting pipeline.  */
+  psw = __nds32__mfsr (NDS32_SR_PSW);
+
+  gie = psw & 0x00000001;
+
+  if (gie != 1)
+    abort ();
+
+  psw = psw & 0xFFFFFFFE;
+  __nds32__mtsr (psw,NDS32_SR_PSW);
+  __nds32__dsb(); /* This is needed for waiting pipeline.  */
+  psw = __nds32__mfsr (NDS32_SR_PSW);
+  gie = psw & 0x00000001;
+
+  if (gie != 0)
+    abort ();
+  else
+   exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c
new file mode 100644
index 00000000000..fce90e9720b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+ __nds32__clr_pending_hwint (NDS32_INT_H0);
+ __nds32__clr_pending_hwint (NDS32_INT_H1);
+ __nds32__clr_pending_hwint (NDS32_INT_H2);
+
+ __nds32__clr_pending_hwint (NDS32_INT_H15);
+ __nds32__clr_pending_hwint (NDS32_INT_H16);
+ __nds32__clr_pending_hwint (NDS32_INT_H31);
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c
new file mode 100644
index 00000000000..08e1dd0c83f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+  __nds32__clr_pending_swint ();
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c
new file mode 100644
index 00000000000..a3a1f44fce5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+  __nds32__disable_int (NDS32_INT_H15);
+  __nds32__disable_int (NDS32_INT_H16);
+  __nds32__disable_int (NDS32_INT_H31);
+  __nds32__disable_int (NDS32_INT_SWI);
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c
new file mode 100644
index 00000000000..e18ed7a9ff0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+  __nds32__enable_int (NDS32_INT_H15);
+  __nds32__enable_int (NDS32_INT_H16);
+  __nds32__enable_int (NDS32_INT_H31);
+  __nds32__enable_int (NDS32_INT_SWI);
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c
new file mode 100644
index 00000000000..4ced0a55d96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+int
+main (void)
+{
+  int a = __nds32__get_pending_int (NDS32_INT_H15);
+  int b = __nds32__get_pending_int (NDS32_INT_SWI);
+  int c = __nds32__get_pending_int (NDS32_INT_H16);
+
+  return a + b + c;
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c
new file mode 100644
index 00000000000..a394a60958a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+int
+main (void)
+{
+  int a = __nds32__get_trig_type (NDS32_INT_H0);
+  int b = __nds32__get_trig_type (NDS32_INT_H15);
+  int c = __nds32__get_trig_type (NDS32_INT_H16);
+  int d = __nds32__get_trig_type (NDS32_INT_H31);
+  return a + b + c + d;
+}
diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c
similarity index 100%
rename from gcc/testsuite/gcc.target/nds32/builtin-isb.c
rename to gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c
diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isync.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c
similarity index 100%
rename from gcc/testsuite/gcc.target/nds32/builtin-isync.c
rename to gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c
diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c
similarity index 100%
rename from gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c
rename to gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c
diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c
similarity index 100%
rename from gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c
rename to gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c
new file mode 100644
index 00000000000..f10b83d2d60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+int
+main (void)
+{
+  __nds32__set_pending_swint ();
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c
new file mode 100644
index 00000000000..bd8178c7165
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+  __nds32__set_trig_type_edge (NDS32_INT_H0);
+  __nds32__set_trig_type_edge (NDS32_INT_H15);
+  __nds32__set_trig_type_edge (NDS32_INT_H16);
+  __nds32__set_trig_type_edge (NDS32_INT_H31);
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c
new file mode 100644
index 00000000000..17805433280
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include <nds32_intrinsic.h>
+
+void
+main (void)
+{
+  __nds32__set_trig_type_level (NDS32_INT_H0);
+  __nds32__set_trig_type_level (NDS32_INT_H15);
+  __nds32__set_trig_type_level (NDS32_INT_H16);
+  __nds32__set_trig_type_level (NDS32_INT_H31);
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c
new file mode 100644
index 00000000000..e143d3fefb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c
@@ -0,0 +1,13 @@
+/* Verify that we generate setgie.d instruction with builtin function.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tsetgie.d" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  __nds32__setgie_dis ();
+}
diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c
new file mode 100644
index 00000000000..ed95782ee5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c
@@ -0,0 +1,13 @@
+/* Verify that we generate setgie.e instruction with builtin function.  */
+
+/* { dg-do compile */
+/* { dg-options "-O0" } */
+/* { dg-final { scan-assembler "\\tsetgie.e" } } */
+
+#include <nds32_intrinsic.h>
+
+void
+test (void)
+{
+  __nds32__setgie_en ();
+}
diff --git a/gcc/testsuite/gcc.target/nds32/nds32.exp b/gcc/testsuite/gcc.target/nds32/nds32.exp
index 44ce72d2583..2f1bff60d67 100644
--- a/gcc/testsuite/gcc.target/nds32/nds32.exp
+++ b/gcc/testsuite/gcc.target/nds32/nds32.exp
@@ -38,8 +38,10 @@ if ![info exists DEFAULT_CFLAGS] then {
 dg-init
 
 # Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/compile/*.\[cS\]]] \
 	"" $DEFAULT_CFLAGS
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
+	"" ""
 
 # All done.
 dg-finish
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 50665dfd30e..fbf7998b7ed 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8767,6 +8767,7 @@ proc check_effective_target_logical_op_short_circuit {} {
 	 || [istarget avr*-*-*]
 	 || [istarget crisv32-*-*] || [istarget cris-*-*]
 	 || [istarget mmix-*-*]
+	 || [istarget nds32*-*-*]
 	 || [istarget s390*-*-*]
 	 || [istarget powerpc*-*-*]
 	 || [istarget nios2*-*-*]
diff --git a/libgcc/config.host b/libgcc/config.host
index 11b4acaff55..fbbc9219d68 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -974,6 +974,23 @@ msp430*-*-elf)
 	tmake_file="$tm_file t-crtstuff t-fdpbit msp430/t-msp430"
         extra_parts="$extra_parts libmul_none.a libmul_16.a libmul_32.a libmul_f5.a"
 	;;
+nds32*-linux*)
+	# Basic makefile fragment and extra_parts for crt stuff.
+	# We also append c-isr library implementation.
+	tmake_file="${tmake_file} t-slibgcc-libgcc"
+	tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp"
+	# The header file of defining MD_FALLBACK_FRAME_STATE_FOR.
+	md_unwind_header=nds32/linux-unwind.h
+	# Append library definition makefile fragment according to --with-nds32-lib=X setting.
+	case "${with_nds32_lib}" in
+	"" | glibc | uclibc )
+		;;
+	*)
+		echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: glibc uclibc" 1>&2
+		exit 1
+		;;
+	esac
+	;;
 nds32*-elf*)
 	# Basic makefile fragment and extra_parts for crt stuff.
 	# We also append c-isr library implementation.
diff --git a/libgcc/config/nds32/initfini.c b/libgcc/config/nds32/initfini.c
index 49ca44fa659..dfbcc43f776 100644
--- a/libgcc/config/nds32/initfini.c
+++ b/libgcc/config/nds32/initfini.c
@@ -25,6 +25,10 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <stddef.h>
+/* Need header file for `struct object' type.  */
+#include "../libgcc/unwind-dw2-fde.h"
+
 /*  Declare a pointer to void function type.  */
 typedef void (*func_ptr) (void);
 
@@ -42,11 +46,59 @@ typedef void (*func_ptr) (void);
    refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__
    symbol in crtinit.o, where they are defined.  */
 
-static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors")))
-     = { (func_ptr) (-1) };
+static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"), used))
+     = { (func_ptr) 0 };
+
+static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"), used))
+     = { (func_ptr) 0 };
+
+
+#ifdef SUPPORT_UNWINDING_DWARF2
+/* Preparation of exception handling with dwar2 mechanism registration.  */
 
-static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
-     = { (func_ptr) (-1) };
+asm ("\n\
+	.section .eh_frame,\"aw\",@progbits\n\
+	.global __EH_FRAME_BEGIN__\n\
+	.type	__EH_FRAME_BEGIN__, @object\n\
+	.align 2\n\
+__EH_FRAME_BEGIN__:\n\
+	! Beginning location of eh_frame section\n\
+	.previous\n\
+");
+
+extern func_ptr __EH_FRAME_BEGIN__[];
+
+
+/* Note that the following two functions are going to be chained into
+   constructor and destructor list, repectively.  So these two declarations
+   must be placed after __CTOR_LIST__ and __DTOR_LIST.  */
+extern void __nds32_register_eh(void) __attribute__((constructor, used));
+extern void __nds32_deregister_eh(void) __attribute__((destructor, used));
+
+/* Register the exception handling table as the first constructor.  */
+void
+__nds32_register_eh (void)
+{
+  static struct object object;
+  if (__register_frame_info)
+    __register_frame_info (__EH_FRAME_BEGIN__, &object);
+}
+
+/* Unregister the exception handling table as a deconstructor.  */
+void
+__nds32_deregister_eh (void)
+{
+  static int completed = 0;
+
+  if (completed)
+    return;
+
+  if (__deregister_frame_info)
+    __deregister_frame_info (__EH_FRAME_BEGIN__);
+
+  completed = 1;
+}
+#endif
 
 /* Run all the global destructors on exit from the program.  */
 
@@ -63,7 +115,7 @@ static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
    same particular root executable or shared library file.  */
 
 static void __do_global_dtors (void)
-asm ("__do_global_dtors") __attribute__ ((section (".text")));
+asm ("__do_global_dtors") __attribute__ ((section (".text"), used));
 
 static void
 __do_global_dtors (void)
@@ -116,23 +168,37 @@ void *__dso_handle = 0;
    last, these words naturally end up at the very ends of the two lists
    contained in these two sections.  */
 
-static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors")))
+static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"), used))
      = { (func_ptr) 0 };
 
-static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors")))
+static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"), used))
      = { (func_ptr) 0 };
 
+#ifdef SUPPORT_UNWINDING_DWARF2
+/* ZERO terminator in .eh_frame section.  */
+asm ("\n\
+	.section .eh_frame,\"aw\",@progbits\n\
+	.global __EH_FRAME_END__\n\
+	.type	__EH_FRAME_END__, @object\n\
+	.align 2\n\
+__EH_FRAME_END__:\n\
+	! End location of eh_frame section with ZERO terminator\n\
+	.word 0\n\
+	.previous\n\
+");
+#endif
+
 /* Run all global constructors for the program.
    Note that they are run in reverse order.  */
 
 static void __do_global_ctors (void)
-asm ("__do_global_ctors") __attribute__ ((section (".text")));
+asm ("__do_global_ctors") __attribute__ ((section (".text"), used));
 
 static void
 __do_global_ctors (void)
 {
   func_ptr *p;
-  for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
+  for (p = __CTOR_END__ - 1; *p; p--)
     (*p) ();
 }
 
diff --git a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc
index 5cc1a6fc88a..275e5580ef3 100644
--- a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc
+++ b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc
@@ -26,13 +26,26 @@
 .macro ADJ_INTR_LVL
 #if defined(NDS32_NESTED) /* Nested handler.  */
 	mfsr	$r3, $PSW
+	/* By substracting 1 from $PSW, we can lower PSW.INTL
+	   and enable GIE simultaneously.  */
 	addi	$r3, $r3, #-0x1
+  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
+  #endif
 	mtsr	$r3, $PSW
 #elif defined(NDS32_NESTED_READY) /* Nested ready handler.  */
 	/* Save ipc and ipsw and lower INT level.  */
 	mfsr	$r3, $PSW
 	addi	$r3, $r3, #-0x2
+  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
+  #endif
 	mtsr	$r3, $PSW
 #else /* Not nested handler.  */
+  #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+    mfsr	$r3, $PSW
+    ori   $r3, $r3, 0x2000  /* Set PSW.AEN(b'13) */
+    mtsr	$r3, $PSW
+  #endif
 #endif
 .endm
diff --git a/libgcc/config/nds32/isr-library/excp_isr.S b/libgcc/config/nds32/isr-library/excp_isr.S
index f24f856e6ee..6e7de5f8fb5 100644
--- a/libgcc/config/nds32/isr-library/excp_isr.S
+++ b/libgcc/config/nds32/isr-library/excp_isr.S
@@ -23,6 +23,7 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include "save_usr_regs.inc"
 #include "save_mac_regs.inc"
 #include "save_fpu_regs.inc"
 #include "save_fpu_regs_00.inc"
@@ -32,35 +33,33 @@
 #include "save_all.inc"
 #include "save_partial.inc"
 #include "adj_intr_lvl.inc"
-#include "restore_mac_regs.inc"
 #include "restore_fpu_regs_00.inc"
 #include "restore_fpu_regs_01.inc"
 #include "restore_fpu_regs_02.inc"
 #include "restore_fpu_regs_03.inc"
 #include "restore_fpu_regs.inc"
+#include "restore_mac_regs.inc"
+#include "restore_usr_regs.inc"
 #include "restore_all.inc"
 #include "restore_partial.inc"
+
 	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
 	.align	1
-/*
-  First Level Handlers
-  1. First Level Handlers are invokded in vector section via jump instruction
-  with specific names for different configurations.
-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
-		    _nds32_i_SR_NT for interrupt handlers.
-  2.1 All upper case letters are replaced with specific lower case letters encodings.
-  2.2 SR: Saved Registers
-      sa: Save All regs (context)
-      ps: Partial Save (all caller-saved regs)
-  2.3 NT: Nested Type
-      ns: nested
-      nn: not nested
-      nr: nested ready
-*/
-
-/*
-  This is original 16-byte vector size version.
-*/
+
+/* First Level Handlers
+   1. First Level Handlers are invokded in vector section via jump instruction
+      with specific names for different configurations.
+   2. Naming Format: _nds32_e_SR_NT for exception handlers.
+                     _nds32_i_SR_NT for interrupt handlers.
+     2.1 All upper case letters are replaced with specific lower case letters encodings.
+     2.2 SR -- Saved Registers
+         sa: Save All regs (context)
+         ps: Partial Save (all caller-saved regs)
+     2.3 NT -- Nested Type
+         ns: nested
+         nn: not nested
+         nr: nested ready */
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.globl	_nds32_e_sa_ns
@@ -91,21 +90,26 @@ _nds32_e_ps_nn:
 #endif /* endif for Nest Type */
 #endif /* not NDS32_SAVE_ALL_REGS */
 
-/*
-  This is 16-byte vector size version.
-  The vector id was restored into $r0 in vector by compiler.
-*/
+
+/* For 4-byte vector size version, the vector id is
+   extracted from $ITYPE and is set into $r0 by library.
+   For 16-byte vector size version, the vector id
+   is set into $r0 in vector section by compiler.  */
+
+/* Save used registers.  */
 #ifdef NDS32_SAVE_ALL_REGS
         SAVE_ALL
 #else
         SAVE_PARTIAL
 #endif
+
 	/* Prepare to call 2nd level handler. */
 	la	$r2, _nds32_jmptbl_00
 	lw	$r2, [$r2 + $r0 << #2]
 	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
 	jral    $r2
-	/* Restore used registers. */
+
+/* Restore used registers.  */
 #ifdef NDS32_SAVE_ALL_REGS
 	RESTORE_ALL
 #else
@@ -113,6 +117,7 @@ _nds32_e_ps_nn:
 #endif
 	iret
 
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.size	_nds32_e_sa_ns, .-_nds32_e_sa_ns
diff --git a/libgcc/config/nds32/isr-library/intr_isr.S b/libgcc/config/nds32/isr-library/intr_isr.S
index 0431ac114fb..23ffa100206 100644
--- a/libgcc/config/nds32/isr-library/intr_isr.S
+++ b/libgcc/config/nds32/isr-library/intr_isr.S
@@ -23,6 +23,7 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include "save_usr_regs.inc"
 #include "save_mac_regs.inc"
 #include "save_fpu_regs.inc"
 #include "save_fpu_regs_00.inc"
@@ -32,35 +33,33 @@
 #include "save_all.inc"
 #include "save_partial.inc"
 #include "adj_intr_lvl.inc"
-#include "restore_mac_regs.inc"
 #include "restore_fpu_regs_00.inc"
 #include "restore_fpu_regs_01.inc"
 #include "restore_fpu_regs_02.inc"
 #include "restore_fpu_regs_03.inc"
 #include "restore_fpu_regs.inc"
+#include "restore_mac_regs.inc"
+#include "restore_usr_regs.inc"
 #include "restore_all.inc"
 #include "restore_partial.inc"
+
 	.section .nds32_isr, "ax"       /* Put it in the section of 1st level handler. */
 	.align	1
-/*
-  First Level Handlers
-  1. First Level Handlers are invokded in vector section via jump instruction
-  with specific names for different configurations.
-  2. Naming Format: _nds32_e_SR_NT for exception handlers.
-		    _nds32_i_SR_NT for interrupt handlers.
-  2.1 All upper case letters are replaced with specific lower case letters encodings.
-  2.2 SR: Saved Registers
-      sa: Save All regs (context)
-      ps: Partial Save (all caller-saved regs)
-  2.3 NT: Nested Type
-      ns: nested
-      nn: not nested
-      nr: nested ready
-*/
-
-/*
-  This is original 16-byte vector size version.
-*/
+
+/* First Level Handlers
+   1. First Level Handlers are invokded in vector section via jump instruction
+      with specific names for different configurations.
+   2. Naming Format: _nds32_e_SR_NT for exception handlers.
+                     _nds32_i_SR_NT for interrupt handlers.
+     2.1 All upper case letters are replaced with specific lower case letters encodings.
+     2.2 SR -- Saved Registers
+         sa: Save All regs (context)
+         ps: Partial Save (all caller-saved regs)
+     2.3 NT -- Nested Type
+         ns: nested
+         nn: not nested
+         nr: nested ready */
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.globl	_nds32_i_sa_ns
@@ -91,21 +90,36 @@ _nds32_i_ps_nn:
 #endif /* endif for Nest Type */
 #endif /* not NDS32_SAVE_ALL_REGS */
 
-/*
-  This is 16-byte vector size version.
-  The vector id was restored into $r0 in vector by compiler.
-*/
+
+/* For 4-byte vector size version, the vector id is
+   extracted from $ITYPE and is set into $r0 by library.
+   For 16-byte vector size version, the vector id
+   is set into $r0 in vector section by compiler.  */
+
+/* Save used registers first.  */
 #ifdef NDS32_SAVE_ALL_REGS
         SAVE_ALL
 #else
         SAVE_PARTIAL
 #endif
-	/* Prepare to call 2nd level handler. */
+
+/* According to vector size, we need to have different implementation.  */
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* Prepare to call 2nd level handler.  */
+	la	$r2, _nds32_jmptbl_00
+	lw	$r2, [$r2 + $r0 << #2]
+	addi    $r0, $r0, #-9	/* Make interrput vector id zero-based.  */
+	ADJ_INTR_LVL	/* Adjust INTR level.  $r3 is clobbered.  */
+	jral    $r2
+#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+	/* Prepare to call 2nd level handler.  */
 	la	$r2, _nds32_jmptbl_09	/* For zero-based vcetor id.  */
 	lw	$r2, [$r2 + $r0 << #2]
 	ADJ_INTR_LVL	/* Adjust INTR level. $r3 is clobbered.  */
 	jral    $r2
-	/* Restore used registers. */
+#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+
+/* Restore used registers.  */
 #ifdef NDS32_SAVE_ALL_REGS
 	RESTORE_ALL
 #else
@@ -113,6 +127,7 @@ _nds32_i_ps_nn:
 #endif
 	iret
 
+
 #ifdef NDS32_SAVE_ALL_REGS
 #if defined(NDS32_NESTED)
 	.size	_nds32_i_sa_ns, .-_nds32_i_sa_ns
diff --git a/libgcc/config/nds32/isr-library/reset.S b/libgcc/config/nds32/isr-library/reset.S
index 78abeb2127c..2ac247e99fb 100644
--- a/libgcc/config/nds32/isr-library/reset.S
+++ b/libgcc/config/nds32/isr-library/reset.S
@@ -26,22 +26,18 @@
 	.section .nds32_isr, "ax"	/* Put it in the section of 1st level handler.  */
 	.align	1
 	.weak	_SDA_BASE_	/* For reset handler only.  */
-	.weak	_FP_BASE_	/* For reset handler only.  */
 	.weak	_nds32_init_mem	/* User defined memory initialization function.  */
 	.globl	_start
 	.globl	_nds32_reset
 	.type	_nds32_reset, @function
 _nds32_reset:
 _start:
-#ifdef  NDS32_EXT_EX9
-	.no_ex9_begin
-#endif
 	/* Handle NMI and warm boot if any of them exists.  */
 	beqz	$sp, 1f		/* Reset, NMI or warm boot?  */
 	/* Either NMI or warm boot; save all regs.  */
 
 	/* Preserve registers for context-switching.  */
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	/* For 16-reg mode.  */
 	smw.adm $r0, [$sp], $r10, #0x0
 	smw.adm $r15, [$sp], $r15, #0xf
@@ -49,10 +45,9 @@ _start:
 	/* For 32-reg mode.  */
 	smw.adm $r0, [$sp], $r27, #0xf
 #endif
-#ifdef NDS32_EXT_IFC
+#if __NDS32_EXT_IFC__
 	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
+	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep stack 8-byte alignment.  */
 #endif
 
 	la	$gp, _SDA_BASE_	/* Init GP for small data access.  */
@@ -71,12 +66,11 @@ _start:
 	bnez    $r0, 1f		/* If fail to resume, do cold boot.  */
 
 	/* Restore registers for context-switching.  */
-#ifdef NDS32_EXT_IFC
-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
-					   stack 8-byte alignment.  */
+#if __NDS32_EXT_IFC__
+	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep stack 8-byte alignment.  */
 	mtusr   $r1, $IFC_LP
 #endif
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	/* For 16-reg mode.  */
 	lmw.bim	$r15, [$sp], $r15, #0xf
 	lmw.bim	$r0, [$sp], $r10, #0x0
@@ -88,6 +82,17 @@ _start:
 
 
 1:	/* Cold boot.  */
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* With vector ID feature for v3 architecture, default vector size is 4-byte.  */
+	/* Set IVB.ESZ = 0 (vector table entry size = 4 bytes)  */
+	mfsr    $r0, $IVB
+	li      $r1, #0xc000
+	or      $r0, $r0, $r1
+	xor     $r0, $r0, $r1
+	mtsr    $r0, $IVB
+	dsb
+#else
+	/* There is no vector ID feature, so the vector size must be 16-byte.  */
 	/* Set IVB.ESZ = 1 (vector table entry size = 16 bytes)  */
 	mfsr    $r0, $IVB
 	li	$r1, #0xffff3fff
@@ -95,36 +100,54 @@ _start:
 	ori	$r0, $r0, #0x4000
 	mtsr    $r0, $IVB
 	dsb
+#endif
 
 	la	$gp, _SDA_BASE_		/* Init $gp.  */
-	la	$fp, _FP_BASE_		/* Init $fp.  */
 	la	$sp, _stack		/* Init $sp.  */
-#ifdef  NDS32_EXT_EX9
-/*
- *	Initialize the table base of EX9 instruction
- *	ex9 generation needs to disable before the ITB is set
- */
-	mfsr    $r0, $MSC_CFG	/* Check if HW support of EX9.  */
+
+#if __NDS32_EXT_EX9__
+.L_init_itb:
+	/* Initialization for Instruction Table Base (ITB).
+	   The symbol _ITB_BASE_ is determined by Linker.
+	   Set $ITB only if MSC_CFG.EIT (cr4.b'24) is set.  */
+	mfsr    $r0, $MSC_CFG
 	srli	$r0, $r0, 24
 	andi	$r0, $r0, 0x1
-	beqz	$r0, 4f		/* Zero means HW does not support EX9.  */
-	la      $r0, _ITB_BASE_	/* Init $ITB.  */
+	beqz	$r0, 4f		/* Fall through ?  */
+	la      $r0, _ITB_BASE_
 	mtusr   $r0, $ITB
-	.no_ex9_end
 4:
 #endif
-	la	$r15, _nds32_init_mem	/* Call DRAM init. _nds32_init_mem
-					  may written by C language.  */
+
+#if __NDS32_EXT_FPU_SP__ || __NDS32_EXT_FPU_DP__
+.L_init_fpu:
+	/* Initialize FPU
+	   Set FUCOP_CTL.CP0EN (fucpr.b'0).  */
+	mfsr    $r0, $FUCOP_CTL
+	ori     $r0, $r0, 0x1
+	mtsr    $r0, $FUCOP_CTL
+	dsb
+	/* According to [bugzilla #9425], set flush-to-zero mode.
+	   That is, set $FPCSR.DNZ(b'12) = 1.  */
+	FMFCSR	$r0
+	ori	$r0, $r0, 0x1000
+	FMTCSR	$r0
+	dsb
+#endif
+
+	/* Call DRAM init. _nds32_init_mem may written by C language.  */
+	la	$r15, _nds32_init_mem
 	beqz	$r15, 6f
 	jral	$r15
 6:
 	l.w	$r15, _nds32_jmptbl_00	/* Load reset handler.  */
 	jral	$r15
-/* Reset handler() should never return in a RTOS or non-OS system.
-   In case it does return, an exception will be generated.
-   This exception will be caught either by default break handler or by EDM.
-   Default break handle may just do an infinite loop.
-   EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */
+
+	/* Reset handler() should never return in a RTOS or non-OS system.
+	   In case it does return, an exception will be generated.
+	   This exception will be caught either by default break handler or by EDM.
+	   Default break handle may just do an infinite loop.
+	   EDM will notify GDB and GDB will regain control when the ID is 0x7fff.  */
 5:
 	break    #0x7fff
 	.size	_nds32_reset, .-_nds32_reset
diff --git a/libgcc/config/nds32/isr-library/restore_all.inc b/libgcc/config/nds32/isr-library/restore_all.inc
index 74556466fa9..23cdf8c6f16 100644
--- a/libgcc/config/nds32/isr-library/restore_all.inc
+++ b/libgcc/config/nds32/isr-library/restore_all.inc
@@ -31,15 +31,11 @@
 	mtsr	$r2, $IPSW
 	RESTORE_FPU_REGS
 	RESTORE_MAC_REGS
-#ifdef NDS32_EXT_IFC
-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
-					   stack 8-byte alignment.  */
-	mtusr   $r1, $IFC_LP
-#endif
-#ifdef __NDS32_REDUCED_REGS__
+  RESTORE_USR_REGS
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	lmw.bim $r0, [$sp], $r10, #0x0	/* Restore all regs.  */
 	lmw.bim $r15, [$sp], $r15, #0xf
-#else /* not __NDS32_REDUCED_REGS__ */
+#else
 	lmw.bim $r0, [$sp], $r27, #0xf	/* Restore all regs.  */
 #endif
 .endm
diff --git a/libgcc/config/nds32/isr-library/restore_mac_regs.inc b/libgcc/config/nds32/isr-library/restore_mac_regs.inc
index 1e6aac669af..a4340833a76 100644
--- a/libgcc/config/nds32/isr-library/restore_mac_regs.inc
+++ b/libgcc/config/nds32/isr-library/restore_mac_regs.inc
@@ -24,7 +24,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 .macro RESTORE_MAC_REGS
-#ifdef NDS32_DX_REGS
+#if __NDS32_DX_REGS__
 	lmw.bim	$r1, [$sp], $r4, #0x0
 	mtusr	$r1, $d0.lo
 	mtusr	$r2, $d0.hi
diff --git a/libgcc/config/nds32/isr-library/restore_partial.inc b/libgcc/config/nds32/isr-library/restore_partial.inc
index d406a99820d..c43ad1600e1 100644
--- a/libgcc/config/nds32/isr-library/restore_partial.inc
+++ b/libgcc/config/nds32/isr-library/restore_partial.inc
@@ -31,15 +31,11 @@
 	mtsr $r1, $IPC	/* Set IPC.  */
 	mtsr $r2, $IPSW	/* Set IPSW.  */
 #endif
-	RESTORE_FPU_REGS
-	RESTORE_MAC_REGS
-#ifdef NDS32_EXT_IFC
-	lmw.bim	$r1, [$sp], $r2, #0x0	/* Restore extra $r2 to keep
-					   stack 8-byte alignment.  */
-	mtusr   $r1, $IFC_LP
-#endif
+  RESTORE_FPU_REGS
+  RESTORE_MAC_REGS
+  RESTORE_USR_REGS
 	lmw.bim $r0, [$sp], $r5, #0x0	/* Restore all regs.  */
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	lmw.bim $r15, [$sp], $r15, #0x2
 #else
 	lmw.bim $r15, [$sp], $r27, #0x2	/* Restore all regs.  */
diff --git a/libgcc/config/nds32/isr-library/restore_usr_regs.inc b/libgcc/config/nds32/isr-library/restore_usr_regs.inc
new file mode 100644
index 00000000000..9602c741cbd
--- /dev/null
+++ b/libgcc/config/nds32/isr-library/restore_usr_regs.inc
@@ -0,0 +1,42 @@
+/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+.macro RESTORE_USR_REGS
+#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
+  lmw.bim $r1, [$sp], $r4, #0x0
+  mtusr   $r1, $IFC_LP
+  mtusr   $r2, $LB
+  mtusr   $r3, $LE
+  mtusr   $r4, $LC
+#elif __NDS32_EXT_IFC__
+  lmw.bim	$r1, [$sp], $r2, #0x0
+  mtusr   $r1, $IFC_LP
+#elif __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__
+  lmw.bim $r1, [$sp], $r4, #0x0
+  mtusr   $r1, $LB
+  mtusr   $r2, $LE
+  mtusr   $r3, $LC
+#endif
+.endm
diff --git a/libgcc/config/nds32/isr-library/save_all.inc b/libgcc/config/nds32/isr-library/save_all.inc
index fa08b399bb4..8886edb1f64 100644
--- a/libgcc/config/nds32/isr-library/save_all.inc
+++ b/libgcc/config/nds32/isr-library/save_all.inc
@@ -23,45 +23,42 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-.macro SAVE_ALL_4B
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_ISR_VECTOR_SIZE_4__
+
+/* If vector size is 4-byte, we have to save registers
+   in the macro implementation.  */
+.macro SAVE_ALL
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	smw.adm $r15, [$sp], $r15, #0xf
 	smw.adm $r0, [$sp], $r10, #0x0
-#else /* not __NDS32_REDUCED_REGS__ */
+#else
 	smw.adm $r0, [$sp], $r27, #0xf
-#endif /* not __NDS32_REDUCED_REGS__ */
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
 #endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 	mfsr	$r1, $IPC	/* Get IPC.  */
 	mfsr	$r2, $IPSW	/* Get IPSW.  */
 	smw.adm	$r1, [$sp], $r2, #0x0	/* Push IPC, IPSW.  */
 	move	$r1, $sp	/* $r1 is ptr to NDS32_CONTEXT.  */
 	mfsr	$r0, $ITYPE	/* Get VID to $r0.  */
 	srli	$r0, $r0, #5
-#ifdef __NDS32_ISA_V2__
 	andi	$r0, $r0, #127
-#else
-	fexti33	$r0, #6
-#endif
 .endm
 
+#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+
+/* If vector size is 16-byte, some works can be done in
+   the vector section generated by compiler, so that we
+   can implement less in the macro.  */
 .macro SAVE_ALL
-/* SAVE_REG_TBL code has been moved to
-   vector table generated by compiler.  */
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
-#endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 	mfsr	$r1, $IPC	/* Get IPC.  */
 	mfsr	$r2, $IPSW	/* Get IPSW.  */
 	smw.adm	$r1, [$sp], $r2, #0x0	/* Push IPC, IPSW.  */
 	move	$r1, $sp	/* $r1 is ptr to NDS32_CONTEXT.  */
 .endm
+
+#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
diff --git a/libgcc/config/nds32/isr-library/save_mac_regs.inc b/libgcc/config/nds32/isr-library/save_mac_regs.inc
index ff120e87a8f..a6a92307fef 100644
--- a/libgcc/config/nds32/isr-library/save_mac_regs.inc
+++ b/libgcc/config/nds32/isr-library/save_mac_regs.inc
@@ -24,7 +24,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 .macro SAVE_MAC_REGS
-#ifdef NDS32_DX_REGS
+#if __NDS32_DX_REGS__
 	mfusr	$r1, $d0.lo
 	mfusr	$r2, $d0.hi
 	mfusr	$r3, $d1.lo
diff --git a/libgcc/config/nds32/isr-library/save_partial.inc b/libgcc/config/nds32/isr-library/save_partial.inc
index 2445e48067e..c81ebaa693c 100644
--- a/libgcc/config/nds32/isr-library/save_partial.inc
+++ b/libgcc/config/nds32/isr-library/save_partial.inc
@@ -23,20 +23,20 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-.macro SAVE_PARTIAL_4B
-#ifdef __NDS32_REDUCED_REGS__
+#if __NDS32_ISR_VECTOR_SIZE_4__
+
+/* If vector size is 4-byte, we have to save registers
+   in the macro implementation.  */
+.macro SAVE_PARTIAL
+#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS
 	smw.adm $r15, [$sp], $r15, #0x2
-#else /* not __NDS32_REDUCED_REGS__ */
+#else
 	smw.adm $r15, [$sp], $r27, #0x2
-#endif /* not __NDS32_REDUCED_REGS__ */
-	smw.adm $r0, [$sp], $r5, #0x0
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
 #endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+	smw.adm $r0, [$sp], $r5, #0x0
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY)
        mfsr    $r1, $IPC       /* Get IPC.  */
        mfsr    $r2, $IPSW      /* Get IPSW.  */
@@ -44,26 +44,24 @@
 #endif
 	mfsr	$r0, $ITYPE	/* Get VID to $r0.  */
 	srli	$r0, $r0, #5
-#ifdef __NDS32_ISA_V2__
 	andi	$r0, $r0, #127
-#else
-	fexti33	$r0, #6
-#endif
 .endm
 
+#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */
+
+/* If vector size is 16-byte, some works can be done in
+   the vector section generated by compiler, so that we
+   can implement less in the macro.  */
+
 .macro SAVE_PARTIAL
-/* SAVE_CALLER_REGS code has been moved to
-   vector table generated by compiler.  */
-#ifdef NDS32_EXT_IFC
-	mfusr   $r1, $IFC_LP
-	smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep
-					   stack 8-byte alignment.  */
-#endif
-	SAVE_MAC_REGS
-	SAVE_FPU_REGS
+  SAVE_USR_REGS
+  SAVE_MAC_REGS
+  SAVE_FPU_REGS
 #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY)
        mfsr    $r1, $IPC       /* Get IPC.  */
        mfsr    $r2, $IPSW      /* Get IPSW.  */
        smw.adm $r1, [$sp], $r2, #0x0   /* Push IPC, IPSW.  */
 #endif
 .endm
+
+#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */
diff --git a/libgcc/config/nds32/isr-library/save_usr_regs.inc b/libgcc/config/nds32/isr-library/save_usr_regs.inc
new file mode 100644
index 00000000000..5a3f6183b68
--- /dev/null
+++ b/libgcc/config/nds32/isr-library/save_usr_regs.inc
@@ -0,0 +1,44 @@
+/* c-isr library stuff of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+.macro SAVE_USR_REGS
+/* Store User Special Registers according to supported ISA extension
+   !!! WATCH OUT !!! Take care of 8-byte alignment issue.  */
+#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
+  mfusr   $r1, $IFC_LP
+  mfusr   $r2, $LB
+  mfusr   $r3, $LE
+  mfusr   $r4, $LC
+  smw.adm $r1, [$sp], $r4, #0x0 /* Save even. Ok!  */
+#elif __NDS32_EXT_IFC__
+  mfusr   $r1, $IFC_LP
+  smw.adm $r1, [$sp], $r2, #0x0	/* Save extra $r2 to keep stack 8-byte aligned.  */
+#elif (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__)
+  mfusr   $r1, $LB
+  mfusr   $r2, $LE
+  mfusr   $r3, $LC
+  smw.adm $r1, [$sp], $r4, #0x0	/* Save extra $r4 to keep stack 8-byte aligned.  */
+#endif
+.endm
diff --git a/libgcc/config/nds32/isr-library/vec_vid00.S b/libgcc/config/nds32/isr-library/vec_vid00.S
index b2a645c53f0..643009eb800 100644
--- a/libgcc/config/nds32/isr-library/vec_vid00.S
+++ b/libgcc/config/nds32/isr-library/vec_vid00.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.00, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_00
 	.type	_nds32_vector_00, @function
 _nds32_vector_00:
diff --git a/libgcc/config/nds32/isr-library/vec_vid01.S b/libgcc/config/nds32/isr-library/vec_vid01.S
index 9e796c70524..fd9bc8b6850 100644
--- a/libgcc/config/nds32/isr-library/vec_vid01.S
+++ b/libgcc/config/nds32/isr-library/vec_vid01.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.01, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_01
 	.type	_nds32_vector_01, @function
 _nds32_vector_01:
diff --git a/libgcc/config/nds32/isr-library/vec_vid02.S b/libgcc/config/nds32/isr-library/vec_vid02.S
index a6b34b7d63a..c5a88435cab 100644
--- a/libgcc/config/nds32/isr-library/vec_vid02.S
+++ b/libgcc/config/nds32/isr-library/vec_vid02.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.02, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_02
 	.type	_nds32_vector_02, @function
 _nds32_vector_02:
diff --git a/libgcc/config/nds32/isr-library/vec_vid03.S b/libgcc/config/nds32/isr-library/vec_vid03.S
index 680f6d9a60f..7f11fb9166b 100644
--- a/libgcc/config/nds32/isr-library/vec_vid03.S
+++ b/libgcc/config/nds32/isr-library/vec_vid03.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.03, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_03
 	.type	_nds32_vector_03, @function
 _nds32_vector_03:
diff --git a/libgcc/config/nds32/isr-library/vec_vid04.S b/libgcc/config/nds32/isr-library/vec_vid04.S
index f0b616ceb8a..de2e249b78f 100644
--- a/libgcc/config/nds32/isr-library/vec_vid04.S
+++ b/libgcc/config/nds32/isr-library/vec_vid04.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.04, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_04
 	.type	_nds32_vector_04, @function
 _nds32_vector_04:
diff --git a/libgcc/config/nds32/isr-library/vec_vid05.S b/libgcc/config/nds32/isr-library/vec_vid05.S
index 47cbcea0a51..62e1cdac4a3 100644
--- a/libgcc/config/nds32/isr-library/vec_vid05.S
+++ b/libgcc/config/nds32/isr-library/vec_vid05.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.05, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_05
 	.type	_nds32_vector_05, @function
 _nds32_vector_05:
diff --git a/libgcc/config/nds32/isr-library/vec_vid06.S b/libgcc/config/nds32/isr-library/vec_vid06.S
index 851836cf9ea..e41a60c4db4 100644
--- a/libgcc/config/nds32/isr-library/vec_vid06.S
+++ b/libgcc/config/nds32/isr-library/vec_vid06.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.06, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_06
 	.type	_nds32_vector_06, @function
 _nds32_vector_06:
diff --git a/libgcc/config/nds32/isr-library/vec_vid07.S b/libgcc/config/nds32/isr-library/vec_vid07.S
index 664ee0ca7b0..b5447a85045 100644
--- a/libgcc/config/nds32/isr-library/vec_vid07.S
+++ b/libgcc/config/nds32/isr-library/vec_vid07.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.07, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_07
 	.type	_nds32_vector_07, @function
 _nds32_vector_07:
diff --git a/libgcc/config/nds32/isr-library/vec_vid08.S b/libgcc/config/nds32/isr-library/vec_vid08.S
index 1b5534c3475..2c07dd35416 100644
--- a/libgcc/config/nds32/isr-library/vec_vid08.S
+++ b/libgcc/config/nds32/isr-library/vec_vid08.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.08, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_08
 	.type	_nds32_vector_08, @function
 _nds32_vector_08:
diff --git a/libgcc/config/nds32/isr-library/vec_vid09.S b/libgcc/config/nds32/isr-library/vec_vid09.S
index 81a56753202..e858cea5f11 100644
--- a/libgcc/config/nds32/isr-library/vec_vid09.S
+++ b/libgcc/config/nds32/isr-library/vec_vid09.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.09, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_09
 	.type	_nds32_vector_09, @function
 _nds32_vector_09:
diff --git a/libgcc/config/nds32/isr-library/vec_vid10.S b/libgcc/config/nds32/isr-library/vec_vid10.S
index 102f7cf2ae6..e8bbc0b6a2c 100644
--- a/libgcc/config/nds32/isr-library/vec_vid10.S
+++ b/libgcc/config/nds32/isr-library/vec_vid10.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.10, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_10
 	.type	_nds32_vector_10, @function
 _nds32_vector_10:
diff --git a/libgcc/config/nds32/isr-library/vec_vid11.S b/libgcc/config/nds32/isr-library/vec_vid11.S
index ade2ee5190c..92aebb41022 100644
--- a/libgcc/config/nds32/isr-library/vec_vid11.S
+++ b/libgcc/config/nds32/isr-library/vec_vid11.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.11, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_11
 	.type	_nds32_vector_11, @function
 _nds32_vector_11:
diff --git a/libgcc/config/nds32/isr-library/vec_vid12.S b/libgcc/config/nds32/isr-library/vec_vid12.S
index a5958111946..6fd050afd40 100644
--- a/libgcc/config/nds32/isr-library/vec_vid12.S
+++ b/libgcc/config/nds32/isr-library/vec_vid12.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.12, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_12
 	.type	_nds32_vector_12, @function
 _nds32_vector_12:
diff --git a/libgcc/config/nds32/isr-library/vec_vid13.S b/libgcc/config/nds32/isr-library/vec_vid13.S
index 55863be5e72..0a45c456b24 100644
--- a/libgcc/config/nds32/isr-library/vec_vid13.S
+++ b/libgcc/config/nds32/isr-library/vec_vid13.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.13, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_13
 	.type	_nds32_vector_13, @function
 _nds32_vector_13:
diff --git a/libgcc/config/nds32/isr-library/vec_vid14.S b/libgcc/config/nds32/isr-library/vec_vid14.S
index abe7f42d1df..837b8487606 100644
--- a/libgcc/config/nds32/isr-library/vec_vid14.S
+++ b/libgcc/config/nds32/isr-library/vec_vid14.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.14, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_14
 	.type	_nds32_vector_14, @function
 _nds32_vector_14:
diff --git a/libgcc/config/nds32/isr-library/vec_vid15.S b/libgcc/config/nds32/isr-library/vec_vid15.S
index 890819f3ec2..c639aa444ba 100644
--- a/libgcc/config/nds32/isr-library/vec_vid15.S
+++ b/libgcc/config/nds32/isr-library/vec_vid15.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.15, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_15
 	.type	_nds32_vector_15, @function
 _nds32_vector_15:
diff --git a/libgcc/config/nds32/isr-library/vec_vid16.S b/libgcc/config/nds32/isr-library/vec_vid16.S
index 20db62501ba..a762130631c 100644
--- a/libgcc/config/nds32/isr-library/vec_vid16.S
+++ b/libgcc/config/nds32/isr-library/vec_vid16.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.16, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_16
 	.type	_nds32_vector_16, @function
 _nds32_vector_16:
diff --git a/libgcc/config/nds32/isr-library/vec_vid17.S b/libgcc/config/nds32/isr-library/vec_vid17.S
index c1ca9f62353..b17681fcb96 100644
--- a/libgcc/config/nds32/isr-library/vec_vid17.S
+++ b/libgcc/config/nds32/isr-library/vec_vid17.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.17, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_17
 	.type	_nds32_vector_17, @function
 _nds32_vector_17:
diff --git a/libgcc/config/nds32/isr-library/vec_vid18.S b/libgcc/config/nds32/isr-library/vec_vid18.S
index ef4cbeec2e6..4166fa1957f 100644
--- a/libgcc/config/nds32/isr-library/vec_vid18.S
+++ b/libgcc/config/nds32/isr-library/vec_vid18.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.18, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_18
 	.type	_nds32_vector_18, @function
 _nds32_vector_18:
diff --git a/libgcc/config/nds32/isr-library/vec_vid19.S b/libgcc/config/nds32/isr-library/vec_vid19.S
index 5efab98f379..0d7d1de38c7 100644
--- a/libgcc/config/nds32/isr-library/vec_vid19.S
+++ b/libgcc/config/nds32/isr-library/vec_vid19.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.19, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_19
 	.type	_nds32_vector_19, @function
 _nds32_vector_19:
diff --git a/libgcc/config/nds32/isr-library/vec_vid20.S b/libgcc/config/nds32/isr-library/vec_vid20.S
index 95e124700c3..d39d74b9ad6 100644
--- a/libgcc/config/nds32/isr-library/vec_vid20.S
+++ b/libgcc/config/nds32/isr-library/vec_vid20.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.20, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_20
 	.type	_nds32_vector_20, @function
 _nds32_vector_20:
diff --git a/libgcc/config/nds32/isr-library/vec_vid21.S b/libgcc/config/nds32/isr-library/vec_vid21.S
index f3f401e25a0..deff0cf9ea9 100644
--- a/libgcc/config/nds32/isr-library/vec_vid21.S
+++ b/libgcc/config/nds32/isr-library/vec_vid21.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.21, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_21
 	.type	_nds32_vector_21, @function
 _nds32_vector_21:
diff --git a/libgcc/config/nds32/isr-library/vec_vid22.S b/libgcc/config/nds32/isr-library/vec_vid22.S
index 28d0d99795f..ebd3891af71 100644
--- a/libgcc/config/nds32/isr-library/vec_vid22.S
+++ b/libgcc/config/nds32/isr-library/vec_vid22.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.22, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_22
 	.type	_nds32_vector_22, @function
 _nds32_vector_22:
diff --git a/libgcc/config/nds32/isr-library/vec_vid23.S b/libgcc/config/nds32/isr-library/vec_vid23.S
index a8246298fed..90562e77bad 100644
--- a/libgcc/config/nds32/isr-library/vec_vid23.S
+++ b/libgcc/config/nds32/isr-library/vec_vid23.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.23, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_23
 	.type	_nds32_vector_23, @function
 _nds32_vector_23:
diff --git a/libgcc/config/nds32/isr-library/vec_vid24.S b/libgcc/config/nds32/isr-library/vec_vid24.S
index 2c0e2d81c8c..7bd344c6c26 100644
--- a/libgcc/config/nds32/isr-library/vec_vid24.S
+++ b/libgcc/config/nds32/isr-library/vec_vid24.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.24, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_24
 	.type	_nds32_vector_24, @function
 _nds32_vector_24:
diff --git a/libgcc/config/nds32/isr-library/vec_vid25.S b/libgcc/config/nds32/isr-library/vec_vid25.S
index 56f78863cef..245db6e67b0 100644
--- a/libgcc/config/nds32/isr-library/vec_vid25.S
+++ b/libgcc/config/nds32/isr-library/vec_vid25.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.25, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_25
 	.type	_nds32_vector_25, @function
 _nds32_vector_25:
diff --git a/libgcc/config/nds32/isr-library/vec_vid26.S b/libgcc/config/nds32/isr-library/vec_vid26.S
index b02163ead68..4df61ff52e4 100644
--- a/libgcc/config/nds32/isr-library/vec_vid26.S
+++ b/libgcc/config/nds32/isr-library/vec_vid26.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.26, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_26
 	.type	_nds32_vector_26, @function
 _nds32_vector_26:
diff --git a/libgcc/config/nds32/isr-library/vec_vid27.S b/libgcc/config/nds32/isr-library/vec_vid27.S
index 276d1f0b49e..50960dbd12c 100644
--- a/libgcc/config/nds32/isr-library/vec_vid27.S
+++ b/libgcc/config/nds32/isr-library/vec_vid27.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.27, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_27
 	.type	_nds32_vector_27, @function
 _nds32_vector_27:
diff --git a/libgcc/config/nds32/isr-library/vec_vid28.S b/libgcc/config/nds32/isr-library/vec_vid28.S
index 59e8cc2c4ea..e44adbb58af 100644
--- a/libgcc/config/nds32/isr-library/vec_vid28.S
+++ b/libgcc/config/nds32/isr-library/vec_vid28.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.28, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_28
 	.type	_nds32_vector_28, @function
 _nds32_vector_28:
diff --git a/libgcc/config/nds32/isr-library/vec_vid29.S b/libgcc/config/nds32/isr-library/vec_vid29.S
index 7119e254afc..f7e6c770e2b 100644
--- a/libgcc/config/nds32/isr-library/vec_vid29.S
+++ b/libgcc/config/nds32/isr-library/vec_vid29.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.29, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_29
 	.type	_nds32_vector_29, @function
 _nds32_vector_29:
diff --git a/libgcc/config/nds32/isr-library/vec_vid30.S b/libgcc/config/nds32/isr-library/vec_vid30.S
index 7c7bd5fd191..7fac25da175 100644
--- a/libgcc/config/nds32/isr-library/vec_vid30.S
+++ b/libgcc/config/nds32/isr-library/vec_vid30.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.30, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_30
 	.type	_nds32_vector_30, @function
 _nds32_vector_30:
diff --git a/libgcc/config/nds32/isr-library/vec_vid31.S b/libgcc/config/nds32/isr-library/vec_vid31.S
index bd29e03c4b8..5857765d22e 100644
--- a/libgcc/config/nds32/isr-library/vec_vid31.S
+++ b/libgcc/config/nds32/isr-library/vec_vid31.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.31, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_31
 	.type	_nds32_vector_31, @function
 _nds32_vector_31:
diff --git a/libgcc/config/nds32/isr-library/vec_vid32.S b/libgcc/config/nds32/isr-library/vec_vid32.S
index 57b8db0bbe4..bcd5dbf88c8 100644
--- a/libgcc/config/nds32/isr-library/vec_vid32.S
+++ b/libgcc/config/nds32/isr-library/vec_vid32.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.32, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_32
 	.type	_nds32_vector_32, @function
 _nds32_vector_32:
diff --git a/libgcc/config/nds32/isr-library/vec_vid33.S b/libgcc/config/nds32/isr-library/vec_vid33.S
index 609735e731d..abfed4eaf7a 100644
--- a/libgcc/config/nds32/isr-library/vec_vid33.S
+++ b/libgcc/config/nds32/isr-library/vec_vid33.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.33, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_33
 	.type	_nds32_vector_33, @function
 _nds32_vector_33:
diff --git a/libgcc/config/nds32/isr-library/vec_vid34.S b/libgcc/config/nds32/isr-library/vec_vid34.S
index 2a91328fb11..f9446bb1b07 100644
--- a/libgcc/config/nds32/isr-library/vec_vid34.S
+++ b/libgcc/config/nds32/isr-library/vec_vid34.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.34, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_34
 	.type	_nds32_vector_34, @function
 _nds32_vector_34:
diff --git a/libgcc/config/nds32/isr-library/vec_vid35.S b/libgcc/config/nds32/isr-library/vec_vid35.S
index 65dd081d7b3..8862137b38f 100644
--- a/libgcc/config/nds32/isr-library/vec_vid35.S
+++ b/libgcc/config/nds32/isr-library/vec_vid35.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.35, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_35
 	.type	_nds32_vector_35, @function
 _nds32_vector_35:
diff --git a/libgcc/config/nds32/isr-library/vec_vid36.S b/libgcc/config/nds32/isr-library/vec_vid36.S
index fa47b8e879c..dbcbbf4298f 100644
--- a/libgcc/config/nds32/isr-library/vec_vid36.S
+++ b/libgcc/config/nds32/isr-library/vec_vid36.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.36, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_36
 	.type	_nds32_vector_36, @function
 _nds32_vector_36:
diff --git a/libgcc/config/nds32/isr-library/vec_vid37.S b/libgcc/config/nds32/isr-library/vec_vid37.S
index ece845633f2..392f18bfe05 100644
--- a/libgcc/config/nds32/isr-library/vec_vid37.S
+++ b/libgcc/config/nds32/isr-library/vec_vid37.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.37, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_37
 	.type	_nds32_vector_37, @function
 _nds32_vector_37:
diff --git a/libgcc/config/nds32/isr-library/vec_vid38.S b/libgcc/config/nds32/isr-library/vec_vid38.S
index c4a12f574ef..efe6619b3a7 100644
--- a/libgcc/config/nds32/isr-library/vec_vid38.S
+++ b/libgcc/config/nds32/isr-library/vec_vid38.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.38, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_38
 	.type	_nds32_vector_38, @function
 _nds32_vector_38:
diff --git a/libgcc/config/nds32/isr-library/vec_vid39.S b/libgcc/config/nds32/isr-library/vec_vid39.S
index b3e56ed7077..238c43aec88 100644
--- a/libgcc/config/nds32/isr-library/vec_vid39.S
+++ b/libgcc/config/nds32/isr-library/vec_vid39.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.39, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_39
 	.type	_nds32_vector_39, @function
 _nds32_vector_39:
diff --git a/libgcc/config/nds32/isr-library/vec_vid40.S b/libgcc/config/nds32/isr-library/vec_vid40.S
index 01364aa4909..cf3eaa21fa6 100644
--- a/libgcc/config/nds32/isr-library/vec_vid40.S
+++ b/libgcc/config/nds32/isr-library/vec_vid40.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.40, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_40
 	.type	_nds32_vector_40, @function
 _nds32_vector_40:
diff --git a/libgcc/config/nds32/isr-library/vec_vid41.S b/libgcc/config/nds32/isr-library/vec_vid41.S
index f20beec98c0..27b7aac3dbb 100644
--- a/libgcc/config/nds32/isr-library/vec_vid41.S
+++ b/libgcc/config/nds32/isr-library/vec_vid41.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.41, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_41
 	.type	_nds32_vector_41, @function
 _nds32_vector_41:
diff --git a/libgcc/config/nds32/isr-library/vec_vid42.S b/libgcc/config/nds32/isr-library/vec_vid42.S
index 6c29f1ff5a4..bfeed46e263 100644
--- a/libgcc/config/nds32/isr-library/vec_vid42.S
+++ b/libgcc/config/nds32/isr-library/vec_vid42.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.42, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_42
 	.type	_nds32_vector_42, @function
 _nds32_vector_42:
diff --git a/libgcc/config/nds32/isr-library/vec_vid43.S b/libgcc/config/nds32/isr-library/vec_vid43.S
index 8767f998513..54640c9b4f7 100644
--- a/libgcc/config/nds32/isr-library/vec_vid43.S
+++ b/libgcc/config/nds32/isr-library/vec_vid43.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.43, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_43
 	.type	_nds32_vector_43, @function
 _nds32_vector_43:
diff --git a/libgcc/config/nds32/isr-library/vec_vid44.S b/libgcc/config/nds32/isr-library/vec_vid44.S
index 8b6f53db5a8..f617243c473 100644
--- a/libgcc/config/nds32/isr-library/vec_vid44.S
+++ b/libgcc/config/nds32/isr-library/vec_vid44.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.44, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_44
 	.type	_nds32_vector_44, @function
 _nds32_vector_44:
diff --git a/libgcc/config/nds32/isr-library/vec_vid45.S b/libgcc/config/nds32/isr-library/vec_vid45.S
index 52e344b0de4..2cfeb785b1b 100644
--- a/libgcc/config/nds32/isr-library/vec_vid45.S
+++ b/libgcc/config/nds32/isr-library/vec_vid45.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.45, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_45
 	.type	_nds32_vector_45, @function
 _nds32_vector_45:
diff --git a/libgcc/config/nds32/isr-library/vec_vid46.S b/libgcc/config/nds32/isr-library/vec_vid46.S
index f9dc0d11382..45c88477ee9 100644
--- a/libgcc/config/nds32/isr-library/vec_vid46.S
+++ b/libgcc/config/nds32/isr-library/vec_vid46.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.46, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_46
 	.type	_nds32_vector_46, @function
 _nds32_vector_46:
diff --git a/libgcc/config/nds32/isr-library/vec_vid47.S b/libgcc/config/nds32/isr-library/vec_vid47.S
index 436e7e3a977..25469e456fd 100644
--- a/libgcc/config/nds32/isr-library/vec_vid47.S
+++ b/libgcc/config/nds32/isr-library/vec_vid47.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.47, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_47
 	.type	_nds32_vector_47, @function
 _nds32_vector_47:
diff --git a/libgcc/config/nds32/isr-library/vec_vid48.S b/libgcc/config/nds32/isr-library/vec_vid48.S
index 219dfd49b19..5a001194edd 100644
--- a/libgcc/config/nds32/isr-library/vec_vid48.S
+++ b/libgcc/config/nds32/isr-library/vec_vid48.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.48, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_48
 	.type	_nds32_vector_48, @function
 _nds32_vector_48:
diff --git a/libgcc/config/nds32/isr-library/vec_vid49.S b/libgcc/config/nds32/isr-library/vec_vid49.S
index e3ba7537f08..dfe11f14017 100644
--- a/libgcc/config/nds32/isr-library/vec_vid49.S
+++ b/libgcc/config/nds32/isr-library/vec_vid49.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.49, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_49
 	.type	_nds32_vector_49, @function
 _nds32_vector_49:
diff --git a/libgcc/config/nds32/isr-library/vec_vid50.S b/libgcc/config/nds32/isr-library/vec_vid50.S
index b0b3fc2b73f..0dacd26315d 100644
--- a/libgcc/config/nds32/isr-library/vec_vid50.S
+++ b/libgcc/config/nds32/isr-library/vec_vid50.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.50, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_50
 	.type	_nds32_vector_50, @function
 _nds32_vector_50:
diff --git a/libgcc/config/nds32/isr-library/vec_vid51.S b/libgcc/config/nds32/isr-library/vec_vid51.S
index bf3011d5ccb..5ab28ef7238 100644
--- a/libgcc/config/nds32/isr-library/vec_vid51.S
+++ b/libgcc/config/nds32/isr-library/vec_vid51.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.51, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_51
 	.type	_nds32_vector_51, @function
 _nds32_vector_51:
diff --git a/libgcc/config/nds32/isr-library/vec_vid52.S b/libgcc/config/nds32/isr-library/vec_vid52.S
index eaf5f14ef25..ed00f4000d1 100644
--- a/libgcc/config/nds32/isr-library/vec_vid52.S
+++ b/libgcc/config/nds32/isr-library/vec_vid52.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.52, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_52
 	.type	_nds32_vector_52, @function
 _nds32_vector_52:
diff --git a/libgcc/config/nds32/isr-library/vec_vid53.S b/libgcc/config/nds32/isr-library/vec_vid53.S
index 3f92e56d665..564cadbf1d4 100644
--- a/libgcc/config/nds32/isr-library/vec_vid53.S
+++ b/libgcc/config/nds32/isr-library/vec_vid53.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.53, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_53
 	.type	_nds32_vector_53, @function
 _nds32_vector_53:
diff --git a/libgcc/config/nds32/isr-library/vec_vid54.S b/libgcc/config/nds32/isr-library/vec_vid54.S
index f22793fe3f2..377c524361e 100644
--- a/libgcc/config/nds32/isr-library/vec_vid54.S
+++ b/libgcc/config/nds32/isr-library/vec_vid54.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.54, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_54
 	.type	_nds32_vector_54, @function
 _nds32_vector_54:
diff --git a/libgcc/config/nds32/isr-library/vec_vid55.S b/libgcc/config/nds32/isr-library/vec_vid55.S
index 1017130a9da..497252ada22 100644
--- a/libgcc/config/nds32/isr-library/vec_vid55.S
+++ b/libgcc/config/nds32/isr-library/vec_vid55.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.55, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_55
 	.type	_nds32_vector_55, @function
 _nds32_vector_55:
diff --git a/libgcc/config/nds32/isr-library/vec_vid56.S b/libgcc/config/nds32/isr-library/vec_vid56.S
index a0923e9e791..b62534b9cbc 100644
--- a/libgcc/config/nds32/isr-library/vec_vid56.S
+++ b/libgcc/config/nds32/isr-library/vec_vid56.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.56, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_56
 	.type	_nds32_vector_56, @function
 _nds32_vector_56:
diff --git a/libgcc/config/nds32/isr-library/vec_vid57.S b/libgcc/config/nds32/isr-library/vec_vid57.S
index e711b890ef4..b1bb42d9c03 100644
--- a/libgcc/config/nds32/isr-library/vec_vid57.S
+++ b/libgcc/config/nds32/isr-library/vec_vid57.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.57, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_57
 	.type	_nds32_vector_57, @function
 _nds32_vector_57:
diff --git a/libgcc/config/nds32/isr-library/vec_vid58.S b/libgcc/config/nds32/isr-library/vec_vid58.S
index f8d90643af1..14595a527a9 100644
--- a/libgcc/config/nds32/isr-library/vec_vid58.S
+++ b/libgcc/config/nds32/isr-library/vec_vid58.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.58, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_58
 	.type	_nds32_vector_58, @function
 _nds32_vector_58:
diff --git a/libgcc/config/nds32/isr-library/vec_vid59.S b/libgcc/config/nds32/isr-library/vec_vid59.S
index 58fb6e626e3..e5be1772425 100644
--- a/libgcc/config/nds32/isr-library/vec_vid59.S
+++ b/libgcc/config/nds32/isr-library/vec_vid59.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.59, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_59
 	.type	_nds32_vector_59, @function
 _nds32_vector_59:
diff --git a/libgcc/config/nds32/isr-library/vec_vid60.S b/libgcc/config/nds32/isr-library/vec_vid60.S
index 94aa6e0ef7a..f6df9712907 100644
--- a/libgcc/config/nds32/isr-library/vec_vid60.S
+++ b/libgcc/config/nds32/isr-library/vec_vid60.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.60, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_60
 	.type	_nds32_vector_60, @function
 _nds32_vector_60:
diff --git a/libgcc/config/nds32/isr-library/vec_vid61.S b/libgcc/config/nds32/isr-library/vec_vid61.S
index 869f6c86514..4f97b043154 100644
--- a/libgcc/config/nds32/isr-library/vec_vid61.S
+++ b/libgcc/config/nds32/isr-library/vec_vid61.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.61, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_61
 	.type	_nds32_vector_61, @function
 _nds32_vector_61:
diff --git a/libgcc/config/nds32/isr-library/vec_vid62.S b/libgcc/config/nds32/isr-library/vec_vid62.S
index acc846c320b..08d1bbb2567 100644
--- a/libgcc/config/nds32/isr-library/vec_vid62.S
+++ b/libgcc/config/nds32/isr-library/vec_vid62.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.62, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_62
 	.type	_nds32_vector_62, @function
 _nds32_vector_62:
diff --git a/libgcc/config/nds32/isr-library/vec_vid63.S b/libgcc/config/nds32/isr-library/vec_vid63.S
index d0727ecdd08..2b2068c4fb5 100644
--- a/libgcc/config/nds32/isr-library/vec_vid63.S
+++ b/libgcc/config/nds32/isr-library/vec_vid63.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.63, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_63
 	.type	_nds32_vector_63, @function
 _nds32_vector_63:
diff --git a/libgcc/config/nds32/isr-library/vec_vid64.S b/libgcc/config/nds32/isr-library/vec_vid64.S
index cb1659ad3ee..2c06ea0cc90 100644
--- a/libgcc/config/nds32/isr-library/vec_vid64.S
+++ b/libgcc/config/nds32/isr-library/vec_vid64.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.64, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_64
 	.type	_nds32_vector_64, @function
 _nds32_vector_64:
diff --git a/libgcc/config/nds32/isr-library/vec_vid65.S b/libgcc/config/nds32/isr-library/vec_vid65.S
index da46481ec02..d2359fd6b2b 100644
--- a/libgcc/config/nds32/isr-library/vec_vid65.S
+++ b/libgcc/config/nds32/isr-library/vec_vid65.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.65, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_65
 	.type	_nds32_vector_65, @function
 _nds32_vector_65:
diff --git a/libgcc/config/nds32/isr-library/vec_vid66.S b/libgcc/config/nds32/isr-library/vec_vid66.S
index a8c18b804b3..69ccf368f6d 100644
--- a/libgcc/config/nds32/isr-library/vec_vid66.S
+++ b/libgcc/config/nds32/isr-library/vec_vid66.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.66, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_66
 	.type	_nds32_vector_66, @function
 _nds32_vector_66:
diff --git a/libgcc/config/nds32/isr-library/vec_vid67.S b/libgcc/config/nds32/isr-library/vec_vid67.S
index d2996a375ee..78a68cb89a9 100644
--- a/libgcc/config/nds32/isr-library/vec_vid67.S
+++ b/libgcc/config/nds32/isr-library/vec_vid67.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.67, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_67
 	.type	_nds32_vector_67, @function
 _nds32_vector_67:
diff --git a/libgcc/config/nds32/isr-library/vec_vid68.S b/libgcc/config/nds32/isr-library/vec_vid68.S
index 0c9de86b1d7..a120ec34377 100644
--- a/libgcc/config/nds32/isr-library/vec_vid68.S
+++ b/libgcc/config/nds32/isr-library/vec_vid68.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.68, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_68
 	.type	_nds32_vector_68, @function
 _nds32_vector_68:
diff --git a/libgcc/config/nds32/isr-library/vec_vid69.S b/libgcc/config/nds32/isr-library/vec_vid69.S
index 43cf748d442..e2bdd5f0442 100644
--- a/libgcc/config/nds32/isr-library/vec_vid69.S
+++ b/libgcc/config/nds32/isr-library/vec_vid69.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.69, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_69
 	.type	_nds32_vector_69, @function
 _nds32_vector_69:
diff --git a/libgcc/config/nds32/isr-library/vec_vid70.S b/libgcc/config/nds32/isr-library/vec_vid70.S
index aba3e6aede0..a5ac1f306ff 100644
--- a/libgcc/config/nds32/isr-library/vec_vid70.S
+++ b/libgcc/config/nds32/isr-library/vec_vid70.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.70, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_70
 	.type	_nds32_vector_70, @function
 _nds32_vector_70:
diff --git a/libgcc/config/nds32/isr-library/vec_vid71.S b/libgcc/config/nds32/isr-library/vec_vid71.S
index be8aaa52534..06ed89c633a 100644
--- a/libgcc/config/nds32/isr-library/vec_vid71.S
+++ b/libgcc/config/nds32/isr-library/vec_vid71.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.71, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_71
 	.type	_nds32_vector_71, @function
 _nds32_vector_71:
diff --git a/libgcc/config/nds32/isr-library/vec_vid72.S b/libgcc/config/nds32/isr-library/vec_vid72.S
index 041c89517e3..2163201b620 100644
--- a/libgcc/config/nds32/isr-library/vec_vid72.S
+++ b/libgcc/config/nds32/isr-library/vec_vid72.S
@@ -24,8 +24,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 	.section	.nds32_vector.72, "ax"
+#if __NDS32_ISR_VECTOR_SIZE_4__
+	/* The vector size is default 4-byte for v3 architecture.  */
+	.vec_size	4
+	.align	2
+#else
+	/* The vector size is default 16-byte for other architectures.  */
 	.vec_size	16
 	.align	4
+#endif
 	.weak	_nds32_vector_72
 	.type	_nds32_vector_72, @function
 _nds32_vector_72:
diff --git a/libgcc/config/nds32/linux-atomic.c b/libgcc/config/nds32/linux-atomic.c
new file mode 100644
index 00000000000..6da7be9a653
--- /dev/null
+++ b/libgcc/config/nds32/linux-atomic.c
@@ -0,0 +1,282 @@
+/* Linux-specific atomic operations for NDS32 Linux.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* We implement byte, short and int versions of each atomic operation
+   using the kernel helper defined below.  There is no support for
+   64-bit operations yet.  */
+
+/* This function copy form NDS32 Linux-kernal. */
+static inline int
+__kernel_cmpxchg (int oldval, int newval, int *mem)
+{
+  int temp1, temp2, temp3, offset;
+
+  asm volatile ("msync\tall\n"
+		"movi\t%0, #0\n"
+		"1:\n"
+		"\tllw\t%1, [%4+%0]\n"
+		"\tsub\t%3, %1, %6\n"
+		"\tcmovz\t%2, %5, %3\n"
+		"\tcmovn\t%2, %1, %3\n"
+		"\tscw\t%2, [%4+%0]\n"
+		"\tbeqz\t%2, 1b\n"
+		: "=&r" (offset), "=&r" (temp3), "=&r" (temp2), "=&r" (temp1)
+		: "r" (mem), "r" (newval), "r" (oldval) : "memory");
+
+  return temp1;
+}
+
+#define HIDDEN __attribute__ ((visibility ("hidden")))
+
+#ifdef __NDS32_EL__
+#define INVERT_MASK_1 0
+#define INVERT_MASK_2 0
+#else
+#define INVERT_MASK_1 24
+#define INVERT_MASK_2 16
+#endif
+
+#define MASK_1 0xffu
+#define MASK_2 0xffffu
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+  {									\
+    int failure, tmp;							\
+									\
+    do {								\
+      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);			\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);						\
+									\
+    return tmp;								\
+  }
+
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,    , |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
+
+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+
+/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
+   subword-sized quantities.  */
+
+#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
+  TYPE HIDDEN								\
+  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
+  {									\
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
+    unsigned int mask, shift, oldval, newval;				\
+    int failure;							\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST);		\
+      newval = ((PFX_OP (((oldval & mask) >> shift)			\
+			 INF_OP (unsigned int) val)) << shift) & mask;	\
+      newval |= oldval & ~mask;						\
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (failure != 0);						\
+									\
+    return (RETURN & mask) >> shift;					\
+  }
+
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
+
+#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
+  {									\
+    int tmp, failure;							\
+									\
+    do {								\
+      tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);			\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);						\
+									\
+    return PFX_OP (tmp INF_OP val);					\
+  }
+
+OP_AND_FETCH_WORD (add,   , +)
+OP_AND_FETCH_WORD (sub,   , -)
+OP_AND_FETCH_WORD (or,    , |)
+OP_AND_FETCH_WORD (and,   , &)
+OP_AND_FETCH_WORD (xor,   , ^)
+OP_AND_FETCH_WORD (nand, ~, &)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+
+int HIDDEN
+__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int actual_oldval, fail;
+
+  while (1)
+    {
+      actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
+
+      if (oldval != actual_oldval)
+	return actual_oldval;
+
+      fail = __kernel_cmpxchg (actual_oldval, newval, ptr);
+
+      if (!fail)
+	return oldval;
+    }
+}
+
+#define SUBWORD_VAL_CAS(TYPE, WIDTH)					\
+  TYPE HIDDEN								\
+  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+				       TYPE newval)			\
+  {									\
+    int *wordptr = (int *)((unsigned long) ptr & ~3), fail;		\
+    unsigned int mask, shift, actual_oldval, actual_newval;		\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    while (1)								\
+      {									\
+	actual_oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); 	\
+									\
+	if (((actual_oldval & mask) >> shift) != (unsigned int) oldval)	\
+	  return (actual_oldval & mask) >> shift;			\
+									\
+	actual_newval = (actual_oldval & ~mask)				\
+			| (((unsigned int) newval << shift) & mask);	\
+									\
+	fail = __kernel_cmpxchg (actual_oldval, actual_newval,		\
+				 wordptr);				\
+									\
+	if (!fail)							\
+	  return oldval;						\
+      }									\
+  }
+
+SUBWORD_VAL_CAS (unsigned short, 2)
+SUBWORD_VAL_CAS (unsigned char,  1)
+
+typedef unsigned char bool;
+
+bool HIDDEN
+__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int failure = __kernel_cmpxchg (oldval, newval, ptr);
+  return (failure == 0);
+}
+
+#define SUBWORD_BOOL_CAS(TYPE, WIDTH)					\
+  bool HIDDEN								\
+  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+					TYPE newval)			\
+  {									\
+    TYPE actual_oldval							\
+      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);	\
+    return (oldval == actual_oldval);					\
+  }
+
+SUBWORD_BOOL_CAS (unsigned short, 2)
+SUBWORD_BOOL_CAS (unsigned char,  1)
+
+int HIDDEN
+__sync_lock_test_and_set_4 (int *ptr, int val)
+{
+  int failure, oldval;
+
+  do {
+    oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST);
+    failure = __kernel_cmpxchg (oldval, val, ptr);
+  } while (failure != 0);
+
+  return oldval;
+}
+
+#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)				\
+  TYPE HIDDEN								\
+  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
+  {									\
+    int failure;							\
+    unsigned int oldval, newval, shift, mask;				\
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST);		\
+      newval = (oldval & ~mask)						\
+	       | (((unsigned int) val << shift) & mask);		\
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (failure != 0);						\
+									\
+    return (oldval & mask) >> shift;					\
+  }
+
+SUBWORD_TEST_AND_SET (unsigned short, 2)
+SUBWORD_TEST_AND_SET (unsigned char,  1)
+
+#define SYNC_LOCK_RELEASE(TYPE, WIDTH)					\
+  void HIDDEN								\
+  __sync_lock_release_##WIDTH (TYPE *ptr)				\
+  {									\
+    /* All writes before this point must be seen before we release	\
+       the lock itself.  */						\
+    __builtin_nds32_msync_all ();					\
+    *ptr = 0;								\
+  }
+
+SYNC_LOCK_RELEASE (int,   4)
+SYNC_LOCK_RELEASE (short, 2)
+SYNC_LOCK_RELEASE (char,  1)
diff --git a/libgcc/config/nds32/linux-unwind.h b/libgcc/config/nds32/linux-unwind.h
new file mode 100644
index 00000000000..00f2b2cfe43
--- /dev/null
+++ b/libgcc/config/nds32/linux-unwind.h
@@ -0,0 +1,143 @@
+/* DWARF2 EH unwinding support for NDS32 Linux signal frame.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef inhibit_libc
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.
+   The corresponding bits in the Linux kernel are in
+   arch/nds32/kernel/signal.c.  */
+
+#include <signal.h>
+#include <asm/unistd.h>
+#include <sys/ucontext.h>
+
+/* Exactly the same layout as the kernel structures, unique names.  */
+
+/* arch/nds32/kernel/signal.c */
+struct _rt_sigframe {
+  siginfo_t info;
+  struct ucontext_t uc;
+};
+
+#define RT_SIGRETURN 0x8b00f044
+
+#define MD_FALLBACK_FRAME_STATE_FOR nds32_fallback_frame_state
+
+/* This function is supposed to be invoked by uw_frame_state_for()
+   when there is no unwind data available.
+
+   Generally, given the _Unwind_Context CONTEXT for a stack frame,
+   we need to look up its caller and decode information into FS.
+   However, if the exception handling happens within a signal handler,
+   the return address of signal handler is a special module, which
+   contains signal return syscall and has no FDE in the .eh_frame section.
+   We need to implement MD_FALLBACK_FRAME_STATE_FOR so that we can
+   unwind through signal frames.  */
+static _Unwind_Reason_Code
+nds32_fallback_frame_state (struct _Unwind_Context *context,
+			    _Unwind_FrameState *fs)
+{
+  u_int32_t *pc = (u_int32_t *) context->ra;
+  struct sigcontext *sc_;
+  _Unwind_Ptr new_cfa;
+
+#ifdef __NDS32_EB__
+#error "Signal handler is not supported for force unwind."
+#endif
+
+  if ((_Unwind_Ptr) pc & 3)
+    return _URC_END_OF_STACK;
+
+  /* Check if we are going through a signal handler.
+     See arch/nds32/kernel/signal.c implementation.
+     FIXME: Currently we only handle little endian (EL) case.  */
+  if (pc[0] == RT_SIGRETURN)
+    {
+      /* Using '_sigfame' memory address to locate kernal's sigcontext.
+	 The sigcontext structures in arch/nds32/include/asm/sigcontext.h.  */
+      struct _rt_sigframe *rt_;
+      rt_ = context->cfa;
+      sc_ = &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  /* Update cfa from sigcontext.  */
+  new_cfa = (_Unwind_Ptr) sc_;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
+  fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa;
+
+#define NDS32_PUT_FS_REG(NUM, NAME) \
+  (fs->regs.reg[NUM].how = REG_SAVED_OFFSET, \
+   fs->regs.reg[NUM].loc.offset = (_Unwind_Ptr) &(sc_->NAME) - new_cfa)
+
+  /* Restore all registers value.  */
+  NDS32_PUT_FS_REG (0, nds32_r0);
+  NDS32_PUT_FS_REG (1, nds32_r1);
+  NDS32_PUT_FS_REG (2, nds32_r2);
+  NDS32_PUT_FS_REG (3, nds32_r3);
+  NDS32_PUT_FS_REG (4, nds32_r4);
+  NDS32_PUT_FS_REG (5, nds32_r5);
+  NDS32_PUT_FS_REG (6, nds32_r6);
+  NDS32_PUT_FS_REG (7, nds32_r7);
+  NDS32_PUT_FS_REG (8, nds32_r8);
+  NDS32_PUT_FS_REG (9, nds32_r9);
+  NDS32_PUT_FS_REG (10, nds32_r10);
+  NDS32_PUT_FS_REG (11, nds32_r11);
+  NDS32_PUT_FS_REG (12, nds32_r12);
+  NDS32_PUT_FS_REG (13, nds32_r13);
+  NDS32_PUT_FS_REG (14, nds32_r14);
+  NDS32_PUT_FS_REG (15, nds32_r15);
+  NDS32_PUT_FS_REG (16, nds32_r16);
+  NDS32_PUT_FS_REG (17, nds32_r17);
+  NDS32_PUT_FS_REG (18, nds32_r18);
+  NDS32_PUT_FS_REG (19, nds32_r19);
+  NDS32_PUT_FS_REG (20, nds32_r20);
+  NDS32_PUT_FS_REG (21, nds32_r21);
+  NDS32_PUT_FS_REG (22, nds32_r22);
+  NDS32_PUT_FS_REG (23, nds32_r23);
+  NDS32_PUT_FS_REG (24, nds32_r24);
+  NDS32_PUT_FS_REG (25, nds32_r25);
+
+  NDS32_PUT_FS_REG (28, nds32_fp);
+  NDS32_PUT_FS_REG (29, nds32_gp);
+  NDS32_PUT_FS_REG (30, nds32_lp);
+  NDS32_PUT_FS_REG (31, nds32_sp);
+
+  /* Restore PC, point to trigger signal instruction.  */
+  NDS32_PUT_FS_REG (32, nds32_ipc);
+
+#undef NDS32_PUT_FS_REG
+
+  /* The retaddr is PC, use PC to find FDE.  */
+  fs->retaddr_column = 32;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#endif
diff --git a/libgcc/config/nds32/sfp-machine.h b/libgcc/config/nds32/sfp-machine.h
index 499bdad7423..bfbdaf9c3bf 100644
--- a/libgcc/config/nds32/sfp-machine.h
+++ b/libgcc/config/nds32/sfp-machine.h
@@ -76,6 +76,25 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
     R##_c = FP_CLS_NAN;						\
   } while (0)
 
+#ifdef NDS32_ABI_2FP_PLUS
+#define FP_RND_NEAREST		0x0
+#define FP_RND_PINF		0x1
+#define FP_RND_MINF		0x2
+#define FP_RND_ZERO		0x3
+#define FP_RND_MASK		0x3
+
+#define _FP_DECL_EX \
+  unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST
+
+#define FP_INIT_ROUNDMODE			\
+  do {						\
+    _fcsr = __builtin_nds32_fmfcsr ();		\
+  } while (0)
+
+#define FP_ROUNDMODE (_fcsr & FP_RND_MASK)
+
+#endif
+
 /* Not checked.  */
 #define _FP_TININESS_AFTER_ROUNDING 0
 
diff --git a/libgcc/config/nds32/t-nds32-glibc b/libgcc/config/nds32/t-nds32-glibc
new file mode 100644
index 00000000000..4e229314c34
--- /dev/null
+++ b/libgcc/config/nds32/t-nds32-glibc
@@ -0,0 +1,34 @@
+# Rules of glibc library makefile of Andes NDS32 cpu for GNU compiler
+# Copyright (C) 2012-2015 Free Software Foundation, Inc.
+# Contributed by Andes Technology Corporation.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compiler flags to use when compiling 'libgcc2.c'
+HOST_LIBGCC2_CFLAGS = -O2 -fPIC -fwrapv
+LIB2ADD += $(srcdir)/config/nds32/linux-atomic.c
+
+#LIB1ASMSRC   = nds32/lib1asmsrc-newlib.S
+#LIB1ASMFUNCS = _divsi3 _modsi3 _udivsi3 _umodsi3
+
+# List of functions not to build from libgcc2.c.
+#LIB2FUNCS_EXCLUDE = _clzsi2
+
+# List of extra C and assembler files(*.S) to add to static libgcc2.
+#LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-newlib/_clzsi2.c
+
+# ------------------------------------------------------------------------
diff --git a/libgcc/config/nds32/t-nds32-isr b/libgcc/config/nds32/t-nds32-isr
index 4f86f900395..abfd82b2248 100644
--- a/libgcc/config/nds32/t-nds32-isr
+++ b/libgcc/config/nds32/t-nds32-isr
@@ -23,11 +23,11 @@
 # Makfile fragment rules for libnds32_isr.a to support ISR attribute extension
 ###############################################################################
 
-# basic flags setting
+# Basic flags setting.
 ISR_CFLAGS = $(CFLAGS) -c
 
-# the object files we would like to create
-LIBNDS32_ISR_16B_OBJS = \
+# The object files we would like to create.
+LIBNDS32_ISR_VEC_OBJS = \
 		vec_vid00.o vec_vid01.o vec_vid02.o vec_vid03.o \
 		vec_vid04.o vec_vid05.o vec_vid06.o vec_vid07.o \
 		vec_vid08.o vec_vid09.o vec_vid10.o vec_vid11.o \
@@ -46,40 +46,9 @@ LIBNDS32_ISR_16B_OBJS = \
 		vec_vid60.o vec_vid61.o vec_vid62.o vec_vid63.o \
 		vec_vid64.o vec_vid65.o vec_vid66.o vec_vid67.o \
 		vec_vid68.o vec_vid69.o vec_vid70.o vec_vid71.o \
-		vec_vid72.o \
-		excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \
-		excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \
-		intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \
-		intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \
-		reset.o
-
-LIBNDS32_ISR_4B_OBJS = \
-		vec_vid00_4b.o vec_vid01_4b.o vec_vid02_4b.o vec_vid03_4b.o \
-		vec_vid04_4b.o vec_vid05_4b.o vec_vid06_4b.o vec_vid07_4b.o \
-		vec_vid08_4b.o vec_vid09_4b.o vec_vid10_4b.o vec_vid11_4b.o \
-		vec_vid12_4b.o vec_vid13_4b.o vec_vid14_4b.o vec_vid15_4b.o \
-		vec_vid16_4b.o vec_vid17_4b.o vec_vid18_4b.o vec_vid19_4b.o \
-		vec_vid20_4b.o vec_vid21_4b.o vec_vid22_4b.o vec_vid23_4b.o \
-		vec_vid24_4b.o vec_vid25_4b.o vec_vid26_4b.o vec_vid27_4b.o \
-		vec_vid28_4b.o vec_vid29_4b.o vec_vid30_4b.o vec_vid31_4b.o \
-		vec_vid32_4b.o vec_vid33_4b.o vec_vid34_4b.o vec_vid35_4b.o \
-		vec_vid36_4b.o vec_vid37_4b.o vec_vid38_4b.o vec_vid39_4b.o \
-		vec_vid40_4b.o vec_vid41_4b.o vec_vid42_4b.o vec_vid43_4b.o \
-		vec_vid44_4b.o vec_vid45_4b.o vec_vid46_4b.o vec_vid47_4b.o \
-		vec_vid48_4b.o vec_vid49_4b.o vec_vid50_4b.o vec_vid51_4b.o \
-		vec_vid52_4b.o vec_vid53_4b.o vec_vid54_4b.o vec_vid55_4b.o \
-		vec_vid56_4b.o vec_vid57_4b.o vec_vid58_4b.o vec_vid59_4b.o \
-		vec_vid60_4b.o vec_vid61_4b.o vec_vid62_4b.o vec_vid63_4b.o \
-		vec_vid64_4b.o vec_vid65_4b.o vec_vid66_4b.o vec_vid67_4b.o \
-		vec_vid68_4b.o vec_vid69_4b.o vec_vid70_4b.o vec_vid71_4b.o \
-		vec_vid72_4b.o \
-		excp_isr_ps_nn_4b.o excp_isr_ps_ns_4b.o excp_isr_ps_nr_4b.o \
-		excp_isr_sa_nn_4b.o excp_isr_sa_ns_4b.o excp_isr_sa_nr_4b.o \
-		intr_isr_ps_nn_4b.o intr_isr_ps_ns_4b.o intr_isr_ps_nr_4b.o \
-		intr_isr_sa_nn_4b.o intr_isr_sa_ns_4b.o intr_isr_sa_nr_4b.o \
-		reset_4b.o
+		vec_vid72.o
 
-LIBNDS32_ISR_COMMON_OBJS = \
+LIBNDS32_ISR_JMP_OBJS = \
 		jmptbl_vid00.o jmptbl_vid01.o jmptbl_vid02.o jmptbl_vid03.o \
 		jmptbl_vid04.o jmptbl_vid05.o jmptbl_vid06.o jmptbl_vid07.o \
 		jmptbl_vid08.o jmptbl_vid09.o jmptbl_vid10.o jmptbl_vid11.o \
@@ -98,29 +67,32 @@ LIBNDS32_ISR_COMMON_OBJS = \
 		jmptbl_vid60.o jmptbl_vid61.o jmptbl_vid62.o jmptbl_vid63.o \
 		jmptbl_vid64.o jmptbl_vid65.o jmptbl_vid66.o jmptbl_vid67.o \
 		jmptbl_vid68.o jmptbl_vid69.o jmptbl_vid70.o jmptbl_vid71.o \
-		jmptbl_vid72.o \
+		jmptbl_vid72.o
+
+LIBNDS32_ISR_COMMON_OBJS = \
+		excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \
+		excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \
+		intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \
+		intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \
+		reset.o \
 		nmih.o \
 		wrh.o
 
-LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_16B_OBJS) $(LIBNDS32_ISR_4B_OBJS) $(LIBNDS32_ISR_COMMON_OBJS)
+LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_VEC_OBJS) $(LIBNDS32_ISR_JMP_OBJS) $(LIBNDS32_ISR_COMMON_OBJS)
 
 
-# Build common objects for ISR library
-nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o
 
-wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o
-
-jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S
+# Build vector vid objects for ISR library.
+vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
 
 
-
-# Build 16b version objects for ISR library. (no "_4b" postfix string)
-vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S
+# Build jump table objects for ISR library.
+jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
 
+
+# Build commen objects for ISR library.
 excp_isr_ps_nn.o: $(srcdir)/config/nds32/isr-library/excp_isr.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr.S -o excp_isr_ps_nn.o
 
@@ -160,48 +132,12 @@ intr_isr_sa_nr.o: $(srcdir)/config/nds32/isr-library/intr_isr.S
 reset.o: $(srcdir)/config/nds32/isr-library/reset.S
 	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset.S -o reset.o
 
-# Build 4b version objects for ISR library.
-vec_vid%_4b.o: $(srcdir)/config/nds32/isr-library/vec_vid%_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@
-
-excp_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nn_4b.o
-
-excp_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_ns_4b.o
-
-excp_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nr_4b.o
-
-excp_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nn_4b.o
-
-excp_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_ns_4b.o
-
-excp_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nr_4b.o
-
-intr_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nn_4b.o
-
-intr_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_ns_4b.o
-
-intr_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nr_4b.o
-
-intr_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nn_4b.o
-
-intr_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_ns_4b.o
+nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S
+	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o
 
-intr_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nr_4b.o
+wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S
+	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o
 
-reset_4b.o: $(srcdir)/config/nds32/isr-library/reset_4b.S
-	$(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset_4b.S -o reset_4b.o
 
 
 # The rule to create libnds32_isr.a file
diff --git a/libgcc/config/nds32/t-nds32-newlib b/libgcc/config/nds32/t-nds32-newlib
index 1ea2bc32163..a59646fcff5 100644
--- a/libgcc/config/nds32/t-nds32-newlib
+++ b/libgcc/config/nds32/t-nds32-newlib
@@ -19,7 +19,7 @@
 # <http://www.gnu.org/licenses/>.
 
 # Compiler flags to use when compiling 'libgcc2.c'
-HOST_LIBGCC2_CFLAGS = -O2
+HOST_LIBGCC2_CFLAGS = -O2 -fwrapv
 
 
 #LIB1ASMSRC   = nds32/lib1asmsrc-newlib.S