1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
x[i] = c;
x[i+1] = c;
x[i+2] = c;
x[i+3] = c;
}
for(; i < n; i++)
x[i] = c;
}
static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
z[i] = x[i] * y[i];
z[i+1] = x[i+1] * y[i+1];
z[i+2] = x[i+2] * y[i+2];
z[i+3] = x[i+3] * y[i+3];
}
for(; i < n; i++)
z[i] = x[i] * y[i];
}
static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(; i < n-4; i += 4)
{
y[i] = x[i] * c;
y[i+1] = x[i+1] * c;
y[i+2] = x[i+2] * c;
y[i+3] = x[i+3] * c;
}
for(; i < n; i++)
y[i] = x[i] * c;
}
static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
z[i] = x[i] + c * y[i];
z[i+1] = x[i+1] + c * y[i+1];
z[i+2] = x[i+2] + c * y[i+2];
z[i+3] = x[i+3] + c * y[i+3];
}
for(; i < n; i++)
z[i] = x[i] + c * y[i];
}
static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
y[i] = x[i] + c;
y[i+1] = x[i+1] + c;
y[i+2] = x[i+2] + c;
y[i+3] = x[i+3] + c;
}
for(; i < n; i++)
y[i] = x[i] + c;
}
static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
z[i] = x[i] / y[i];
z[i+1] = x[i+1] / y[i+1];
z[i+2] = x[i+2] / y[i+2];
z[i+3] = x[i+3] / y[i+3];
}
for(; i < n; i++)
z[i] = x[i] / y[i];
}
static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
long i = 0;
for(;i < n-4; i += 4)
{
y[i] = x[i] / c;
y[i+1] = x[i+1] / c;
y[i+2] = x[i+2] / c;
y[i+3] = x[i+3] / c;
}
for(; i < n; i++)
y[i] = x[i] / c;
}
|