#include <stdio.h>
#include <stdlib.h>
#include <arm_neon.h>
int main()
{
int x[10]={0};
int *x_value;
x_value=x;
int data;
asm(
" str fp, [sp, #-4]! \n\t"
" add fp, sp, #0 \n\t"
/*1-8*/
" mov r3, #1 \n\t"
" strb r3, [fp, #-48] \n\t"
" mov r3,#2 \n\t"
" strb r3, [fp, #-47] \n\t"
" mov r3, #3 \n\t"
" strb r3, [fp, #-46] \n\t"
" mov r3,#4 \n\t"
" strb r3,[fp,#-45] \n\t"
" mov r3, #5 \n\t"
" strb r3, [fp, #-44] \n\t"
" mov r3,#6 \n\t"
" strb r3,[fp,#-43] \n\t"
" mov r3,#7 \n\t"
" strb r3,[fp,#-42] \n\t"
" mov r3,#8 \n\t"
" strb r3,[fp,#-41] \n\t"
/*9-16*/
" mov r3, #9 \n\t"
" strb r3, [fp, #-40] \n\t"
" mov r3,#10 \n\t"
" strb r3, [fp, #-39] \n\t"
" mov r3, #11 \n\t"
" strb r3, [fp, #-38] \n\t"
" mov r3,#12 \n\t"
" strb r3,[fp,#-37] \n\t"
" mov r3, #13 \n\t"
" strb r3, [fp, #-36] \n\t"
" mov r3,#14 \n\t"
" strb r3,[fp,#-35] \n\t"
" mov r3,#14 \n\t"
" strb r3,[fp,#-34] \n\t"
" mov r3,#16 \n\t"
" strb r3,[fp,#-33] \n\t"
" sub r3, fp,#48 \n\t"//back to [fp,#-48]
" vld1.8 {d16},[r3] \n\t"//load to neon register,
" sub r3,fp, #40 \n\t"//back to [fp,#32]
" vld1.8 {d17},[r3] \n\t" //load to neon register,
" vadd.i8 d0,d16,d17 \n\t"
" sub r3, fp,#200 \n\t"
" vst1.8 {d0}, [r3] \n\t"
" ldrb r3,[fp,#-200] \n\t"
" str r3,[%[x],#0] \n\t"
" ldrb r3,[fp,#-199] \n\t"
" str r3,[%[x],#4] \n\t"
" ldrb r3,[fp,#-198] \n\t"
" str r3,[%[x],#8] \n\t"
" ldrb r3,[fp,#-197] \n\t"
" str r3,[%[x],#12] \n\t"
" ldrb r3,[fp,#-196] \n\t"
" str r3,[%[x],#16] \n\t"
" ldrb r3,[fp,#-195] \n\t"
" str r3,[%[x],#20] \n\t"
" ldrb r3,[fp,#-194] \n\t"
" str r3,[%[x],#24] \n\t"
" ldrb r3,[fp,#-193] \n\t"
" str r3,[%[x],#28] \n\t"
" mov %[data],r3 \n\t"
" ldr fp, [sp], #4 \n\t"
:[x]"+r"(x_value),[data]"+r"(data)
:
:"r0","r3","memory"
);
int i;
for(i=0;i<10;i++)
printf("%d:%d-%d\n",i,x[i],data);
return 0;
}
记录小例子ARM C嵌套汇编&NEON运算小例子
最新推荐文章于 2024-06-12 21:23:37 发布