summaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/torch7/lib/TH/vector/NEON.c
blob: 7920fb13b1429f0ba5e25da0665f004e756deb10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) {
  long i = 0;

  for(; i < n-4; i += 4)
  {
    x[i] = c;
    x[i+1] = c;
    x[i+2] = c;
    x[i+3] = c;
  }

  for(; i < n; i++)
    x[i] = c;

}

static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) {
  long i = 0;

  for(; i < n-4; i += 4)
  {
    z[i] = x[i] * y[i];
    z[i+1] = x[i+1] * y[i+1];
    z[i+2] = x[i+2] * y[i+2];
    z[i+3] = x[i+3] * y[i+3];
  }

  for(; i < n; i++)
    z[i] = x[i] * y[i];
}

static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
  long i = 0;

  for(; i < n-4; i += 4)
  {
    y[i] = x[i] * c;
    y[i+1] = x[i+1] * c;
    y[i+2] = x[i+2] * c;
    y[i+3] = x[i+3] * c;
  }

  for(; i < n; i++)
    y[i] = x[i] * c;
}

static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
  long i = 0;

  for(;i < n-4; i += 4)
  {
    z[i] = x[i] + c * y[i];
    z[i+1] = x[i+1] + c * y[i+1];
    z[i+2] = x[i+2] + c * y[i+2];
    z[i+3] = x[i+3] + c * y[i+3];
  }

  for(; i < n; i++)
    z[i] = x[i] + c * y[i];
}

static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
  long i = 0;

  for(;i < n-4; i += 4)
  {
    y[i] = x[i] + c;
    y[i+1] = x[i+1] + c;
    y[i+2] = x[i+2] + c;
    y[i+3] = x[i+3] + c;
  }

  for(; i < n; i++)
    y[i] = x[i] + c;
}

static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) {
  long i = 0;

  for(;i < n-4; i += 4)
  {
    z[i] = x[i] / y[i];
    z[i+1] = x[i+1] / y[i+1];
    z[i+2] = x[i+2] / y[i+2];
    z[i+3] = x[i+3] / y[i+3];
  }

  for(; i < n; i++)
    z[i] = x[i] / y[i];
}

static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
  long i = 0;

  for(;i < n-4; i += 4)
  {
    y[i] = x[i] / c;
    y[i+1] = x[i+1] / c;
    y[i+2] = x[i+2] / c;
    y[i+3] = x[i+3] / c;
  }

  for(; i < n; i++)
    y[i] = x[i] / c;
}