summaryrefslogtreecommitdiffstats
path: root/contrib/lua-torch/torch7/lib
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-01 15:13:04 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2019-07-01 15:13:04 +0100
commit891b250b452f8e1963a99931f241ac75e34d0281 (patch)
treeab56b822aca3cc6d02a3c9afbe8ca2f6d1c0381f /contrib/lua-torch/torch7/lib
parent38691d998d019ac0fba95720c337e3f9badf55c4 (diff)
downloadrspamd-891b250b452f8e1963a99931f241ac75e34d0281.tar.gz
rspamd-891b250b452f8e1963a99931f241ac75e34d0281.zip
[Project] Remove torch
Diffstat (limited to 'contrib/lua-torch/torch7/lib')
-rw-r--r--contrib/lua-torch/torch7/lib/CMakeLists.txt7
-rw-r--r--contrib/lua-torch/torch7/lib/TH/CMakeLists.txt296
-rw-r--r--contrib/lua-torch/torch7/lib/TH/README.md11
-rw-r--r--contrib/lua-torch/torch7/lib/TH/TH.h25
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THAllocator.c500
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THAllocator.h43
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THAtomic.c267
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THAtomic.h125
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THBlas.c4
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THBlas.h11
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THConfig.cmake.in9
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THDiskFile.c797
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THDiskFile.h19
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THFile.c157
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THFile.h91
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THFilePrivate.h50
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGeneral.c406
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGeneral.h.in130
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateAllTypes.h17
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateByteType.h24
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateCharType.h24
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateDoubleType.h24
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateFloatType.h24
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateFloatTypes.h17
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateHalfType.h25
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateIntType.h24
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateIntTypes.h20
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateLongType.h24
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THGenerateShortType.h24
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THHalf.c100
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THHalf.h41
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THLapack.c4
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THLapack.h27
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THLogAdd.c88
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THLogAdd.h14
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THMath.h36
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THMemoryFile.c685
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THMemoryFile.h13
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THRandom.c272
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THRandom.h81
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THSize.c26
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THSize.h13
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THStorage.c153
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THStorage.h39
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THTensor.c34
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THTensor.h42
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THTensorApply.h238
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THTensorDimApply.h324
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THTensorMacros.h30
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THVector.c30
-rw-r--r--contrib/lua-torch/torch7/lib/TH/THVector.h13
-rw-r--r--contrib/lua-torch/torch7/lib/TH/cmake/FindARM.cmake76
-rw-r--r--contrib/lua-torch/torch7/lib/TH/cmake/FindBLAS.cmake309
-rw-r--r--contrib/lua-torch/torch7/lib/TH/cmake/FindLAPACK.cmake190
-rw-r--r--contrib/lua-torch/torch7/lib/TH/cmake/FindMKL.cmake272
-rw-r--r--contrib/lua-torch/torch7/lib/TH/cmake/FindSSE.cmake125
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THBlas.c412
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THBlas.h19
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THLapack.c270
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THLapack.h40
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THStorage.c226
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THStorage.h71
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.c75
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.h18
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensor.c939
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensor.h138
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.c1957
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.h79
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.c136
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.h17
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.c1121
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.h25
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.c3275
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.h198
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.c250
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.h25
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THVector.h17
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THVectorDefault.c131
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/THVectorDispatch.c262
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/simd/common_simd.h395
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.c127
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.h1
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_avx.c212
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_sse.c320
-rw-r--r--contrib/lua-torch/torch7/lib/TH/generic/simd/simd.h165
-rw-r--r--contrib/lua-torch/torch7/lib/TH/vector/AVX.c274
-rw-r--r--contrib/lua-torch/torch7/lib/TH/vector/AVX.h23
-rw-r--r--contrib/lua-torch/torch7/lib/TH/vector/AVX2.c47
-rw-r--r--contrib/lua-torch/torch7/lib/TH/vector/AVX2.h9
-rw-r--r--contrib/lua-torch/torch7/lib/TH/vector/NEON.c105
-rw-r--r--contrib/lua-torch/torch7/lib/TH/vector/SSE.c268
-rw-r--r--contrib/lua-torch/torch7/lib/TH/vector/VSX.c2520
-rw-r--r--contrib/lua-torch/torch7/lib/luaT/CMakeLists.txt12
-rw-r--r--contrib/lua-torch/torch7/lib/luaT/README.md266
-rw-r--r--contrib/lua-torch/torch7/lib/luaT/luaT.c1373
-rw-r--r--contrib/lua-torch/torch7/lib/luaT/luaT.h135
-rw-r--r--contrib/lua-torch/torch7/lib/luaT/luaTConfig.cmake.in9
97 files changed, 0 insertions, 22432 deletions
diff --git a/contrib/lua-torch/torch7/lib/CMakeLists.txt b/contrib/lua-torch/torch7/lib/CMakeLists.txt
deleted file mode 100644
index d6a0e2c9c..000000000
--- a/contrib/lua-torch/torch7/lib/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-SET(TH_INSTALL_BIN_SUBDIR "${BINDIR}")
-SET(TH_INSTALL_LIB_SUBDIR "${RSPAMD_LIBDIR}")
-SET(TH_INSTALL_INCLUDE_SUBDIR "${Torch_INSTALL_INCLUDE_SUBDIR}")
-SET(TH_INSTALL_CMAKE_SUBDIR "${Torch_INSTALL_CMAKE_SUBDIR}")
-
-ADD_SUBDIRECTORY(TH)
-ADD_SUBDIRECTORY(luaT)
diff --git a/contrib/lua-torch/torch7/lib/TH/CMakeLists.txt b/contrib/lua-torch/torch7/lib/TH/CMakeLists.txt
deleted file mode 100644
index f7e0bf9bb..000000000
--- a/contrib/lua-torch/torch7/lib/TH/CMakeLists.txt
+++ /dev/null
@@ -1,296 +0,0 @@
-cmake_minimum_required(VERSION 2.6)
-
-# avoid some cmake warnings
-
-LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
-SET(CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/ ${CMAKE_LIBRARY_PATH})
-
-#######################################################################
-##### flags section
-######################################################################
-
-IF(MSVC)
- # MSVC now supports C99 since VS2013/VS2015, however the standard version switch is not provided yet
- # SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c99")
-ELSE(MSVC)
- # enable gnu99 and not c99 because we use
- # gnu extensions like posix_memalign
- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
-ENDIF(MSVC)
-
-IF(MSVC)
- ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) # respect the standard
-ENDIF(MSVC)
-SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
-IF(UNIX)
- # prevent Unknown CMake command "check_function_exists".
- INCLUDE(CheckFunctionExists)
-ENDIF(UNIX)
-
-# OpenMP support?
-
-IF (WITH_OPENMP)
- FIND_PACKAGE(OpenMP)
- IF(OPENMP_FOUND)
- MESSAGE(STATUS "Compiling with OpenMP support")
- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
- SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
- ENDIF(OPENMP_FOUND)
-ENDIF (WITH_OPENMP)
-
-# ARM specific flags
-FIND_PACKAGE(ARM)
-IF (ASIMD_FOUND)
- MESSAGE(STATUS "asimd/Neon found with compiler flag : -D__NEON__")
- SET(CMAKE_C_FLAGS "-D__NEON__ ${CMAKE_C_FLAGS}")
-ELSEIF (NEON_FOUND)
- MESSAGE(STATUS "Neon found with compiler flag : -mfpu=neon -D__NEON__")
- SET(CMAKE_C_FLAGS "-mfpu=neon -D__NEON__ ${CMAKE_C_FLAGS}")
-ENDIF (ASIMD_FOUND)
-IF (CORTEXA8_FOUND)
- MESSAGE(STATUS "Cortex-A8 Found with compiler flag : -mcpu=cortex-a8")
- SET(CMAKE_C_FLAGS "-mcpu=cortex-a8 -fprefetch-loop-arrays ${CMAKE_C_FLAGS}")
-ENDIF (CORTEXA8_FOUND)
-IF (CORTEXA9_FOUND)
- MESSAGE(STATUS "Cortex-A9 Found with compiler flag : -mcpu=cortex-a9")
- SET(CMAKE_C_FLAGS "-mcpu=cortex-a9 ${CMAKE_C_FLAGS}")
-ENDIF (CORTEXA9_FOUND)
-
-INCLUDE (CheckIncludeFile)
-INCLUDE (CheckCSourceCompiles)
-CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
-# Check for a cpuid intrinsic
-IF(HAVE_CPUID_H)
- CHECK_C_SOURCE_COMPILES("#include <cpuid.h>
- int main()
- {
- unsigned int eax, ebx, ecx, edx;
- return __get_cpuid(0, &eax, &ebx, &ecx, &edx);
- }" HAVE_GCC_GET_CPUID)
-ENDIF()
-IF(HAVE_GCC_GET_CPUID)
- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_GCC_GET_CPUID")
-ENDIF(HAVE_GCC_GET_CPUID)
-
-CHECK_C_SOURCE_COMPILES("#include <stdint.h>
- static inline void cpuid(uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
- {
- uint32_t a = *eax, b, c = *ecx, d;
- asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) );
- *eax = a; *ebx = b; *ecx = c; *edx = d;
- }
- int main() {
- uint32_t a,b,c,d;
- cpuid(&a, &b, &c, &d);
- return 0;
- }" NO_GCC_EBX_FPIC_BUG)
-
-IF(NOT NO_GCC_EBX_FPIC_BUG)
- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_GCC_GET_CPUID")
-ENDIF(NOT NO_GCC_EBX_FPIC_BUG)
-
-
-FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2
-IF(C_SSE2_FOUND)
- MESSAGE(STATUS "SSE2 Found")
- SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}")
-ENDIF(C_SSE2_FOUND)
-IF(C_SSE3_FOUND)
- MESSAGE(STATUS "SSE3 Found")
- SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}")
-ENDIF(C_SSE3_FOUND)
-# we dont set -mavx and -mavx2 flags globally, but only for specific files
-# however, we want to enable the AVX codepaths, so we still need to
-# add USE_AVX and USE_AVX2 macro defines
-IF(FALSE)
-IF(C_AVX_FOUND)
- MESSAGE(STATUS "AVX Found")
- SET(CMAKE_C_FLAGS "-DUSE_AVX ${CMAKE_C_FLAGS}")
-ENDIF(C_AVX_FOUND)
-IF(C_AVX2_FOUND)
- MESSAGE(STATUS "AVX2 Found")
- SET(CMAKE_C_FLAGS "-DUSE_AVX2 ${CMAKE_C_FLAGS}")
-ENDIF(C_AVX2_FOUND)
-ENDIF()
-
-CHECK_C_SOURCE_RUNS("
-#include <stdatomic.h>
-int main()
-{
- int a;
- int oa;
- atomic_store(&a, 1);
- atomic_fetch_add(&a, 1);
- oa = atomic_load(&a);
- if(!atomic_compare_exchange_strong(&a, &oa, 3))
- return -1;
- return 0;
-}
-" HAS_C11_ATOMICS)
-
-IF(NOT HAS_C11_ATOMICS)
- CHECK_C_SOURCE_RUNS("
-#include <intrin.h>
-int main()
-{
- long a;
- _InterlockedExchange(&a, 1);
- _InterlockedExchangeAdd(&a, 1);
- if(_InterlockedCompareExchange(&a, 3, 2) != 2)
- return -1;
- return 0;
-}
-" HAS_MSC_ATOMICS)
-
- CHECK_C_SOURCE_RUNS("
-int main()
-{
- int a;
- __sync_lock_test_and_set(&a, 1);
- __sync_fetch_and_add(&a, 1);
- if(!__sync_bool_compare_and_swap(&a, 2, 3))
- return -1;
- return 0;
-}
-" HAS_GCC_ATOMICS)
-ENDIF()
-
-#######################################################################
-##### sources section
-######################################################################
-
-# IF ANY SIMD FOUND
-IF ("${ARCH}" STREQUAL "x86_64")
- SET(simd generic/simd/convolve.c generic/simd/convolve5x5_sse.c)
- SET(CMAKE_C_FLAGS "-DUSE_SSE2 ${CMAKE_C_FLAGS}")
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math")
-ENDIF()
-
-
-# IF AVX FOUND
-IF(FALSE)
-IF(C_AVX_FOUND)
- IF(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast ${C_AVX_FLAGS}")
- SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS "/Ox /arch:AVX ${C_AVX_FLAGS}")
- ELSE(MSVC)
- SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${C_AVX_FLAGS}")
- SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS "-O3 ${C_AVX_FLAGS}")
- ENDIF(MSVC)
- SET(simd ${simd} vector/AVX.c generic/simd/convolve5x5_avx.c)
-ENDIF(C_AVX_FOUND)
-
-IF(C_AVX2_FOUND)
- IF(MSVC)
- SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "/Ox /arch:AVX2 ${C_AVX2_FLAGS}")
- ELSE(MSVC)
- SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "-O3 ${C_AVX2_FLAGS}")
- ENDIF(MSVC)
- SET(simd ${simd} vector/AVX2.c)
-ENDIF(C_AVX2_FOUND)
-ENDIF()
-
-SET(hdr
- THGeneral.h THHalf.h THAllocator.h THSize.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h
- THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h )
-
-SET(src
- THGeneral.c THHalf.c THAllocator.c THSize.c THStorage.c THTensor.c THBlas.c THLapack.c
- THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c)
-
-SET(src ${src} ${hdr} ${simd})
-
-#######################################################################
-##### build section
-######################################################################
-
-ADD_TORCH_LIBRARY(TH SHARED "${src}")
-
-IF(HAS_C11_ATOMICS)
- ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1)
- MESSAGE(STATUS "Atomics: using C11 intrinsics")
-ELSEIF(HAS_MSC_ATOMICS)
- ADD_DEFINITIONS(-DUSE_MSC_ATOMICS=1)
- MESSAGE(STATUS "Atomics: using MSVC intrinsics")
-ELSEIF(HAS_GCC_ATOMICS)
- ADD_DEFINITIONS(-DUSE_GCC_ATOMICS=1)
- MESSAGE(STATUS "Atomics: using GCC intrinsics")
-ELSE()
- SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)
- FIND_PACKAGE(Threads)
- IF(THREADS_FOUND)
- ADD_DEFINITIONS(-DUSE_PTHREAD_ATOMICS=1)
- TARGET_LINK_LIBRARIES(TH ${CMAKE_THREAD_LIBS_INIT})
- MESSAGE(STATUS "Atomics: using pthread")
- ENDIF()
-ENDIF()
-
-FIND_PACKAGE(BLAS)
-IF(BLAS_FOUND)
- SET(USE_BLAS 1)
- TARGET_LINK_LIBRARIES(TH ${BLAS_LIBRARIES})
- IF(BLAS_INFO STREQUAL "mkl")
- ADD_DEFINITIONS(-DTH_BLAS_MKL)
- ELSEIF(BLAS_INFO STREQUAL "open")
- ADD_DEFINITIONS(-DTH_BLAS_OPEN)
- ENDIF()
-ENDIF(BLAS_FOUND)
-
-FIND_PACKAGE(LAPACK)
-IF(LAPACK_FOUND)
- SET(USE_LAPACK 1)
- TARGET_LINK_LIBRARIES(TH ${LAPACK_LIBRARIES})
-ENDIF(LAPACK_FOUND)
-
-IF (UNIX AND NOT APPLE)
- INCLUDE(CheckLibraryExists)
- # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830
- CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT)
- IF(NEED_LIBRT)
- TARGET_LINK_LIBRARIES(TH rt)
- SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt)
- ENDIF(NEED_LIBRT)
-ENDIF(UNIX AND NOT APPLE)
-
-IF(UNIX)
- SET(CMAKE_EXTRA_INCLUDE_FILES "sys/mman.h")
- CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP)
- IF(HAVE_MMAP)
- ADD_DEFINITIONS(-DHAVE_MMAP=1)
- ENDIF(HAVE_MMAP)
- # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html
- ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64)
- CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN)
- IF(HAVE_SHM_OPEN)
- ADD_DEFINITIONS(-DHAVE_SHM_OPEN=1)
- ENDIF(HAVE_SHM_OPEN)
- CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK)
- IF(HAVE_SHM_UNLINK)
- ADD_DEFINITIONS(-DHAVE_SHM_UNLINK=1)
- ENDIF(HAVE_SHM_UNLINK)
- CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE)
- IF(HAVE_MALLOC_USABLE_SIZE)
- ADD_DEFINITIONS(-DHAVE_MALLOC_USABLE_SIZE=1)
- ENDIF(HAVE_MALLOC_USABLE_SIZE)
-ENDIF(UNIX)
-
-IF(NOT MSVC)
- TARGET_LINK_LIBRARIES(TH m)
-ENDIF(NOT MSVC)
-
-# Is __thread supported?
-IF(NOT MSVC)
- CHECK_C_SOURCE_COMPILES("static __thread int x = 1; int main() { return x; }" C_HAS_THREAD)
-ELSE(NOT MSVC)
- CHECK_C_SOURCE_COMPILES("static __declspec( thread ) int x = 1; int main() { return x; }" C_HAS_THREAD)
-ENDIF(NOT MSVC)
-IF(NOT C_HAS_THREAD)
- MESSAGE(STATUS "Warning: __thread is not supported, generating thread-unsafe code")
-ELSE(NOT C_HAS_THREAD)
- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTH_HAVE_THREAD")
-ENDIF(NOT C_HAS_THREAD)
-
-INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
-CONFIGURE_FILE(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h")
diff --git a/contrib/lua-torch/torch7/lib/TH/README.md b/contrib/lua-torch/torch7/lib/TH/README.md
deleted file mode 100644
index 4ac26c103..000000000
--- a/contrib/lua-torch/torch7/lib/TH/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-Environment variables control the disabling of certain explicit SIMD optimizations.
-
-```
-x64 options:
-TH_NO_AVX2=1 # disable AVX2 codepaths
-TH_NO_AVX=1 # disable AVX codepaths
-TH_NO_SSE=1 # disable SSE codepaths
-
-ppc64le options:
-TH_NO_VSX=1 # disable VSX codepaths
-```
diff --git a/contrib/lua-torch/torch7/lib/TH/TH.h b/contrib/lua-torch/torch7/lib/TH/TH.h
deleted file mode 100644
index 11f208c4b..000000000
--- a/contrib/lua-torch/torch7/lib/TH/TH.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef TH_INC
-#define TH_INC
-
-#include "THGeneral.h"
-
-#include "THBlas.h"
-#ifdef USE_LAPACK
-#include "THLapack.h"
-#endif
-
-#include "THAtomic.h"
-#include "THVector.h"
-#include "THLogAdd.h"
-#include "THRandom.h"
-#include "THSize.h"
-#include "THStorage.h"
-#include "THTensor.h"
-#include "THTensorApply.h"
-#include "THTensorDimApply.h"
-
-#include "THFile.h"
-#include "THDiskFile.h"
-#include "THMemoryFile.h"
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THAllocator.c b/contrib/lua-torch/torch7/lib/TH/THAllocator.c
deleted file mode 100644
index 51ac69b94..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THAllocator.c
+++ /dev/null
@@ -1,500 +0,0 @@
-#include "THAllocator.h"
-#include "THAtomic.h"
-
-/* stuff for mapped files */
-#ifdef _WIN32
-#include <windows.h>
-#endif
-
-#if HAVE_MMAP
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-/* end of stuff for mapped files */
-
-static void *THDefaultAllocator_alloc(void* ctx, ptrdiff_t size) {
- return THAlloc(size);
-}
-
-static void *THDefaultAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {
- return THRealloc(ptr, size);
-}
-
-static void THDefaultAllocator_free(void* ctx, void* ptr) {
- THFree(ptr);
-}
-
-THAllocator THDefaultAllocator = {
- &THDefaultAllocator_alloc,
- &THDefaultAllocator_realloc,
- &THDefaultAllocator_free
-};
-
-#if defined(_WIN32) || defined(HAVE_MMAP)
-
-struct THMapAllocatorContext_ {
- char *filename; /* file name */
- int flags;
- ptrdiff_t size; /* mapped size */
- int fd;
-};
-
-#define TH_ALLOC_ALIGNMENT 64
-
-typedef struct {
- int refcount;
-} THMapInfo;
-
-char * unknown_filename = "filename not specified";
-
-THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags)
-{
- THMapAllocatorContext *ctx = THAlloc(sizeof(THMapAllocatorContext));
-
- if (!(flags & TH_ALLOCATOR_MAPPED_SHARED) && !(flags & TH_ALLOCATOR_MAPPED_SHAREDMEM))
- flags &= ~TH_ALLOCATOR_MAPPED_NOCREATE;
- if ((flags ^ TH_ALLOCATOR_MAPPED_EXCLUSIVE) == 0)
- THError("TH_ALLOCATOR_MAPPED_EXCLUSIVE flag requires opening the file "
- "in shared mode");
-
- if (filename) {
- ctx->filename = THAlloc(strlen(filename)+1);
- strcpy(ctx->filename, filename);
- } else {
- ctx->filename = unknown_filename;
- }
- ctx->flags = flags;
- ctx->size = 0;
- ctx->fd = -1;
-
- return ctx;
-}
-
-THMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename, int fd, int flags)
-{
- THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags);
- ctx->fd = fd;
-
- return ctx;
-}
-
-char * THMapAllocatorContext_filename(THMapAllocatorContext *ctx)
-{
- return ctx->filename;
-}
-
-int THMapAllocatorContext_fd(THMapAllocatorContext *ctx)
-{
- return ctx->fd;
-}
-
-ptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx)
-{
- return ctx->size;
-}
-
-void THMapAllocatorContext_free(THMapAllocatorContext *ctx)
-{
- if (ctx->filename != unknown_filename)
- THFree(ctx->filename);
- THFree(ctx);
-}
-
-static void *_map_alloc(void* ctx_, ptrdiff_t size)
-{
- THMapAllocatorContext *ctx = ctx_;
- void *data = NULL;
-
-#ifdef _WIN32
- {
- HANDLE hfile;
- HANDLE hmfile;
- LARGE_INTEGER hfilesz;
-
- if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE)
- THError("exclusive file mapping is not supported on Windows");
- if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE)
- THError("file mapping without creation is not supported on Windows");
- if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD)
- THError("TH_ALLOCATOR_MAPPED_KEEPFD not supported on Windows");
- if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)
- THError("TH_ALLOCATOR_MAPPED_FROMFD not supported on Windows");
-
- /* open file */
- /* FILE_FLAG_RANDOM_ACCESS ? */
- if(ctx->flags)
- {
- hfile = CreateFileA(ctx->filename, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
- if (hfile == INVALID_HANDLE_VALUE)
- THError("could not open file <%s> in read-write mode; error code: <%d>", ctx->filename, GetLastError());
- }
- else
- {
- hfile = CreateFileA(ctx->filename, GENERIC_READ, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
- if (hfile == INVALID_HANDLE_VALUE)
- THError("could not open file <%s> in read-only mode; error code: <%d>", ctx->filename, GetLastError());
- }
-
- if (GetFileSizeEx(hfile, &hfilesz) == 0)
- {
- THError("could not get file size: <%s>; error code: <%d>", ctx->filename, GetLastError());
- }
-
- if(size > 0)
- {
- if(size > hfilesz.QuadPart)
- {
- if(ctx->flags)
- {
- hfilesz.QuadPart = size;
- if(SetFilePointerEx(hfile, hfilesz, NULL, FILE_BEGIN) == 0)
- {
- CloseHandle(hfile);
- THError("unable to stretch file <%s> to the right size; error code: <%d>", ctx->filename, GetLastError());
- }
- if(SetEndOfFile(hfile) == 0)
- {
- CloseHandle(hfile);
- THError("unable to write to file <%s>; error code: <%d>", ctx->filename, GetLastError());
- }
- }
- else
- {
- CloseHandle(hfile);
- THError("file <%s> size is smaller than the required mapping size <%ld>; error code: <%d>", ctx->filename, size, GetLastError());
- }
- }
- }
- else
- size = hfilesz.QuadPart;
-
- ctx->size = size; /* if we are here, it must be the right size */
-
- hfilesz.QuadPart = ctx->size;
-
- /* get map handle */
- if(ctx->flags)
- {
- if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_READWRITE, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL )
- THError("could not create a map on file <%s>; error code: <%d>", ctx->filename, GetLastError());
- }
- else
- {
- if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_WRITECOPY, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL )
- THError("could not create a map on file <%s>; error code: <%d>", ctx->filename, GetLastError());
- }
-
- /* map the stuff */
- if(ctx->flags)
- data = MapViewOfFile(hmfile, FILE_MAP_ALL_ACCESS, 0, 0, 0);
- else
- data = MapViewOfFile(hmfile, FILE_MAP_COPY, 0, 0, 0);
-
- CloseHandle(hfile);
- CloseHandle(hmfile);
- }
-#else /* _WIN32 */
- {
- /* open file */
- int fd;
- int flags;
- struct stat file_stat;
-
- if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM))
- flags = O_RDWR | O_CREAT;
- else
- flags = O_RDONLY;
-
- if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE)
- flags |= O_EXCL;
- if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE)
- flags &= ~O_CREAT;
-
- if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)) {
- if(ctx->flags & TH_ALLOCATOR_MAPPED_SHARED)
- {
- if((fd = open(ctx->filename, flags, (mode_t)0600)) == -1)
- THError("unable to open file <%s> in read-write mode", ctx->filename);
- }
- else if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)
- {
-#ifdef HAVE_SHM_OPEN
- if((fd = shm_open(ctx->filename, flags, (mode_t)0600)) == -1)
- THError("unable to open shared memory object <%s> in read-write mode", ctx->filename);
-#else
- THError("unable to open file <%s> in sharedmem mode, shm_open unavailable on this platform", ctx->filename);
-#endif
- }
- else
- {
- if((fd = open(ctx->filename, O_RDONLY)) == -1)
- THError("unable to open file <%s> in read-only mode", ctx->filename);
- }
- } else {
- fd = ctx->fd;
- }
-
- if(fstat(fd, &file_stat) == -1)
- {
- if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD))
- close(fd);
- THError("unable to stat the file <%s>", ctx->filename);
- }
-
- if(size > 0)
- {
- if(size > file_stat.st_size)
- {
- if(ctx->flags)
- {
- if(ftruncate(fd, size) == -1)
- THError("unable to resize file <%s> to the right size", ctx->filename);
- if(fstat(fd, &file_stat) == -1 || file_stat.st_size < size)
- {
- close(fd);
- THError("unable to stretch file <%s> to the right size", ctx->filename);
- }
-/* on OS X write returns with errno 45 (Opperation not supported) when used
- * with a file descriptor obtained via shm_open
- */
-#ifndef __APPLE__
- if((write(fd, "", 1)) != 1) /* note that the string "" contains the '\0' byte ... */
- {
- close(fd);
- THError("unable to write to file <%s>", ctx->filename);
- }
-#endif
- }
- else
- {
- close(fd);
- THError("file <%s> size is smaller than the required mapping size <%ld>", ctx->filename, size);
- }
- }
- }
- else
- size = file_stat.st_size;
-
- ctx->size = size; /* if we are here, it must be the right size */
-
- /* map it */
- if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM))
- data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- else
- data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
-
- if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) {
- ctx->fd = fd;
- } else {
- if(close(fd) == -1)
- THError("Error closing file <%s>", ctx->filename);
- ctx->fd = -1;
- }
-
- if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK) {
- if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)
- {
-#ifdef HAVE_SHM_UNLINK
- if (shm_unlink(ctx->filename) == -1)
- THError("could not unlink the shared memory file %s", ctx->filename);
-#else
- THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename);
-#endif
- }
- else
- {
- if (unlink(ctx->filename) == -1)
- THError("could not unlink file %s", ctx->filename);
- }
- }
-
- if(data == MAP_FAILED)
- {
- data = NULL; /* let's be sure it is NULL */
- THError("$ Torch: unable to mmap memory: you tried to mmap %dGB.", ctx->size/1073741824);
- }
- }
-#endif
-
- return data;
-}
-
-static void * THMapAllocator_alloc(void *ctx, ptrdiff_t size) {
- return _map_alloc(ctx, size);
-}
-
-static void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {
- THError("cannot realloc mapped data");
- return NULL;
-}
-
-static void THMapAllocator_free(void* ctx_, void* data) {
- THMapAllocatorContext *ctx = ctx_;
-
-#ifdef _WIN32
- if(UnmapViewOfFile(data) == 0)
- THError("could not unmap the shared memory file");
-#else /* _WIN32 */
- if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) {
- if (close(ctx->fd) == -1)
- THError("could not close file descriptor %d", ctx->fd);
- }
-
- if (munmap(data, ctx->size))
- THError("could not unmap the shared memory file");
-
- if (!(ctx->flags & (TH_ALLOCATOR_MAPPED_FROMFD | TH_ALLOCATOR_MAPPED_UNLINK)))
- {
- if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)
- {
-#ifdef HAVE_SHM_UNLINK
- if (shm_unlink(ctx->filename) == -1)
- THError("could not unlink the shared memory file %s", ctx->filename);
-#else
- THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename);
-#endif
- }
- }
-#endif /* _WIN32 */
-
- THMapAllocatorContext_free(ctx);
-}
-
-#else
-
-THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags) {
- THError("file mapping not supported on your system");
- return NULL;
-}
-
-void THMapAllocatorContext_free(THMapAllocatorContext *ctx) {
- THError("file mapping not supported on your system");
-}
-
-static void *THMapAllocator_alloc(void* ctx_, ptrdiff_t size) {
- THError("file mapping not supported on your system");
- return NULL;
-}
-
-static void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {
- THError("file mapping not supported on your system");
- return NULL;
-}
-
-static void THMapAllocator_free(void* ctx, void* data) {
- THError("file mapping not supported on your system");
-}
-
-#endif
-
-#if (defined(_WIN32) || defined(HAVE_MMAP)) && defined(TH_ATOMIC_IPC_REFCOUNT)
-
-static void * THRefcountedMapAllocator_alloc(void *_ctx, ptrdiff_t size) {
- THMapAllocatorContext *ctx = _ctx;
-
- if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)
- THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_FROMFD flag");
- if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD)
- THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_KEEPFD flag");
- if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK)
- THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_UNLINK flag");
- if (!(ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM))
- THError("THRefcountedMapAllocator requires TH_ALLOCATOR_MAPPED_SHAREDMEM flag");
-
- size = size + TH_ALLOC_ALIGNMENT;
- void *ptr = _map_alloc(ctx, size);
- char *data = ((char*)ptr) + TH_ALLOC_ALIGNMENT;
- THMapInfo *map_info = (THMapInfo*)ptr;
-
- if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE)
- map_info->refcount = 1;
- else
- THAtomicIncrementRef(&map_info->refcount);
-
- return (void*)data;
-}
-
-static void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {
- THError("cannot realloc mapped data");
- return NULL;
-}
-
-static void THRefcountedMapAllocator_free(void* ctx_, void* data) {
- THMapAllocatorContext *ctx = ctx_;
-
-#ifdef _WIN32
- if(UnmapViewOfFile(data) == 0)
- THError("could not unmap the shared memory file");
-#else /* _WIN32 */
-
- THMapInfo *info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT);
- if (THAtomicDecrementRef(&info->refcount)) {
-#ifdef HAVE_SHM_UNLINK
- if (shm_unlink(ctx->filename) == -1)
- THError("could not unlink the shared memory file %s", ctx->filename);
-#else
- THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename);
-#endif /* HAVE_SHM_UNLINK */
- }
- if (munmap(info, ctx->size))
- THError("could not unmap the shared memory file %s", ctx->filename);
-#endif /* _WIN32 */
-
- THMapAllocatorContext_free(ctx);
-}
-
-void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data)
-{
- THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT);
- THAtomicIncrementRef(&map_info->refcount);
-}
-
-int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data)
-{
- THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT);
- return THAtomicDecrementRef(&map_info->refcount);
-}
-
-#else
-
-static void * THRefcountedMapAllocator_alloc(void *ctx, ptrdiff_t size) {
- THError("refcounted file mapping not supported on your system");
- return NULL;
-}
-
-static void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {
- THError("refcounted file mapping not supported on your system");
- return NULL;
-}
-
-static void THRefcountedMapAllocator_free(void* ctx_, void* data) {
- THError("refcounted file mapping not supported on your system");
-}
-
-void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data)
-{
- THError("refcounted file mapping not supported on your system");
-}
-
-int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data)
-{
- THError("refcounted file mapping not supported on your system");
- return 0;
-}
-
-#endif
-
-THAllocator THMapAllocator = {
- &THMapAllocator_alloc,
- &THMapAllocator_realloc,
- &THMapAllocator_free
-};
-
-THAllocator THRefcountedMapAllocator = {
- &THRefcountedMapAllocator_alloc,
- &THRefcountedMapAllocator_realloc,
- &THRefcountedMapAllocator_free
-};
diff --git a/contrib/lua-torch/torch7/lib/TH/THAllocator.h b/contrib/lua-torch/torch7/lib/TH/THAllocator.h
deleted file mode 100644
index 18fc9ec0a..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THAllocator.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef TH_ALLOCATOR_INC
-#define TH_ALLOCATOR_INC
-
-#include "THGeneral.h"
-
-#define TH_ALLOCATOR_MAPPED_SHARED 1
-#define TH_ALLOCATOR_MAPPED_SHAREDMEM 2
-#define TH_ALLOCATOR_MAPPED_EXCLUSIVE 4
-#define TH_ALLOCATOR_MAPPED_NOCREATE 8
-#define TH_ALLOCATOR_MAPPED_KEEPFD 16
-#define TH_ALLOCATOR_MAPPED_FROMFD 32
-#define TH_ALLOCATOR_MAPPED_UNLINK 64
-
-/* Custom allocator
- */
-typedef struct THAllocator {
- void* (*malloc)(void*, ptrdiff_t);
- void* (*realloc)(void*, void*, ptrdiff_t);
- void (*free)(void*, void*);
-} THAllocator;
-
-/* default malloc/free allocator. malloc and realloc raise an error (using
- * THError) on allocation failure.
- */
-extern THAllocator THDefaultAllocator;
-
-/* file map allocator
- */
-typedef struct THMapAllocatorContext_ THMapAllocatorContext;
-TH_API THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags);
-TH_API THMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename,
- int fd, int flags);
-TH_API char * THMapAllocatorContext_filename(THMapAllocatorContext *ctx);
-TH_API int THMapAllocatorContext_fd(THMapAllocatorContext *ctx);
-TH_API ptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx);
-TH_API void THMapAllocatorContext_free(THMapAllocatorContext *ctx);
-TH_API void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data);
-TH_API int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data);
-
-extern THAllocator THMapAllocator;
-extern THAllocator THRefcountedMapAllocator;
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THAtomic.c b/contrib/lua-torch/torch7/lib/TH/THAtomic.c
deleted file mode 100644
index 714fc52db..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THAtomic.c
+++ /dev/null
@@ -1,267 +0,0 @@
-#include "THAtomic.h"
-
-/*
- Note: I thank Leon Bottou for his useful comments.
- Ronan.
-*/
-
-#if defined(USE_C11_ATOMICS)
-#include <stdatomic.h>
-#endif
-
-#if defined(USE_MSC_ATOMICS)
-#include <intrin.h>
-#include <assert.h>
-#endif
-
-#if !defined(USE_MSC_ATOMICS) && !defined(USE_GCC_ATOMICS) && defined(USE_PTHREAD_ATOMICS)
-#include <pthread.h>
-static pthread_mutex_t ptm = PTHREAD_MUTEX_INITIALIZER;
-#endif
-
-void THAtomicSet(int volatile *a, int newvalue)
-{
-#if defined(USE_C11_ATOMICS)
- atomic_store(a, newvalue);
-#elif defined(USE_MSC_ATOMICS)
- assert(sizeof(int) == sizeof(long));
- _InterlockedExchange((long*)a, newvalue);
-#elif defined(USE_GCC_ATOMICS)
- __sync_lock_test_and_set(a, newvalue);
-#else
- int oldvalue;
- do {
- oldvalue = *a;
- } while (!THAtomicCompareAndSwap(a, oldvalue, newvalue));
-#endif
-}
-
-int THAtomicGet(int volatile *a)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_load(a);
-#else
- int value;
- do {
- value = *a;
- } while (!THAtomicCompareAndSwap(a, value, value));
- return value;
-#endif
-}
-
-int THAtomicAdd(int volatile *a, int value)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_fetch_add(a, value);
-#elif defined(USE_MSC_ATOMICS)
- assert(sizeof(int) == sizeof(long));
- return _InterlockedExchangeAdd((long*)a, value);
-#elif defined(USE_GCC_ATOMICS)
- return __sync_fetch_and_add(a, value);
-#else
- int oldvalue;
- do {
- oldvalue = *a;
- } while (!THAtomicCompareAndSwap(a, oldvalue, (oldvalue + value)));
- return oldvalue;
-#endif
-}
-
-void THAtomicIncrementRef(int volatile *a)
-{
- THAtomicAdd(a, 1);
-}
-
-int THAtomicDecrementRef(int volatile *a)
-{
- return (THAtomicAdd(a, -1) == 1);
-}
-
-int THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_compare_exchange_strong(a, &oldvalue, newvalue);
-#elif defined(USE_MSC_ATOMICS)
- assert(sizeof(int) == sizeof(long));
- return (_InterlockedCompareExchange((long*)a, (long)newvalue, (long)oldvalue) == (long)oldvalue);
-#elif defined(USE_GCC_ATOMICS)
- return __sync_bool_compare_and_swap(a, oldvalue, newvalue);
-#elif defined(USE_PTHREAD_ATOMICS)
- int ret = 0;
- pthread_mutex_lock(&ptm);
- if(*a == oldvalue) {
- *a = newvalue;
- ret = 1;
- }
- pthread_mutex_unlock(&ptm);
- return ret;
-#else
-#warning THAtomic is not thread safe
- if(*a == oldvalue) {
- *a = newvalue;
- return 1;
- }
- else
- return 0;
-#endif
-}
-
-void THAtomicSetLong(long volatile *a, long newvalue)
-{
-#if defined(USE_C11_ATOMICS)
- atomic_store(a, newvalue);
-#elif defined(USE_MSC_ATOMICS)
- _InterlockedExchange(a, newvalue);
-#elif defined(USE_GCC_ATOMICS)
- __sync_lock_test_and_set(a, newvalue);
-#else
- long oldvalue;
- do {
- oldvalue = *a;
- } while (!THAtomicCompareAndSwapLong(a, oldvalue, newvalue));
-#endif
-}
-
-long THAtomicGetLong(long volatile *a)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_load(a);
-#else
- long value;
- do {
- value = *a;
- } while (!THAtomicCompareAndSwapLong(a, value, value));
- return value;
-#endif
-}
-
-long THAtomicAddLong(long volatile *a, long value)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_fetch_add(a, value);
-#elif defined(USE_MSC_ATOMICS)
- return _InterlockedExchangeAdd(a, value);
-#elif defined(USE_GCC_ATOMICS)
- return __sync_fetch_and_add(a, value);
-#else
- long oldvalue;
- do {
- oldvalue = *a;
- } while (!THAtomicCompareAndSwapLong(a, oldvalue, (oldvalue + value)));
- return oldvalue;
-#endif
-}
-
-long THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_compare_exchange_strong(a, &oldvalue, newvalue);
-#elif defined(USE_MSC_ATOMICS)
- return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue);
-#elif defined(USE_GCC_ATOMICS)
- return __sync_bool_compare_and_swap(a, oldvalue, newvalue);
-#elif defined(USE_PTHREAD_ATOMICS)
- long ret = 0;
- pthread_mutex_lock(&ptm);
- if(*a == oldvalue) {
- *a = newvalue;
- ret = 1;
- }
- pthread_mutex_unlock(&ptm);
- return ret;
-#else
-#warning THAtomic is not thread safe
- if(*a == oldvalue) {
- *a = newvalue;
- return 1;
- }
- else
- return 0;
-#endif
-}
-
-void THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue)
-{
-#if defined(USE_C11_ATOMICS)
- atomic_store(a, newvalue);
-#elif defined(USE_MSC_ATOMICS)
-#ifdef _WIN64
- _InterlockedExchange64(a, newvalue);
-#else
- _InterlockedExchange(a, newvalue);
-#endif
-#elif defined(USE_GCC_ATOMICS)
- __sync_lock_test_and_set(a, newvalue);
-#else
- ptrdiff_t oldvalue;
- do {
- oldvalue = *a;
- } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, newvalue));
-#endif
-}
-
-ptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_load(a);
-#else
- ptrdiff_t value;
- do {
- value = *a;
- } while (!THAtomicCompareAndSwapPtrdiff(a, value, value));
- return value;
-#endif
-}
-
-ptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_fetch_add(a, value);
-#elif defined(USE_MSC_ATOMICS)
-#ifdef _WIN64
- return _InterlockedExchangeAdd64(a, value);
-#else
- return _InterlockedExchangeAdd(a, value);
-#endif
-#elif defined(USE_GCC_ATOMICS)
- return __sync_fetch_and_add(a, value);
-#else
- ptrdiff_t oldvalue;
- do {
- oldvalue = *a;
- } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, (oldvalue + value)));
- return oldvalue;
-#endif
-}
-
-ptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue)
-{
-#if defined(USE_C11_ATOMICS)
- return atomic_compare_exchange_strong(a, &oldvalue, newvalue);
-#elif defined(USE_MSC_ATOMICS)
-#ifdef _WIN64
- return (_InterlockedCompareExchange64(a, newvalue, oldvalue) == oldvalue);
-#else
- return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue);
-#endif
-#elif defined(USE_GCC_ATOMICS)
- return __sync_bool_compare_and_swap(a, oldvalue, newvalue);
-#elif defined(USE_PTHREAD_ATOMICS)
- ptrdiff_t ret = 0;
- pthread_mutex_lock(&ptm);
- if(*a == oldvalue) {
- *a = newvalue;
- ret = 1;
- }
- pthread_mutex_unlock(&ptm);
- return ret;
-#else
-#warning THAtomic is not thread safe
- if(*a == oldvalue) {
- *a = newvalue;
- return 1;
- }
- else
- return 0;
-#endif
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THAtomic.h b/contrib/lua-torch/torch7/lib/TH/THAtomic.h
deleted file mode 100644
index d77b20b24..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THAtomic.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef TH_ATOMIC_INC
-#define TH_ATOMIC_INC
-
-#include "THGeneral.h"
-
-/******************************************************************************
- * Atomic operations for TH
- * Five backends are integrated:
- * - C11 atomic operations
- * - MSVC intrinsics
- * - GCC intrinsics
- * - Pthread if none of the above is available
- * - Unsafe mode in none of the above is available
- ******************************************************************************/
-
-
-/******************************************************************************
- * all-purpose functions
- ******************************************************************************/
-
-/*
- * *a = newvalue
-*/
-TH_API void THAtomicSet(int volatile *a, int newvalue);
-
-/*
- * return *a
-*/
-TH_API int THAtomicGet(int volatile *a);
-
-/*
- * *a += value,
- * return previous *a
-*/
-TH_API int THAtomicAdd(int volatile *a, int value);
-
-/*
- * check if (*a == oldvalue)
- * if true: set *a to newvalue, return 1
- * if false: return 0
-*/
-TH_API int THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue);
-
-
-/******************************************************************************
- * refcounting functions
- ******************************************************************************/
-
-/*
- * *a++
-*/
-TH_API void THAtomicIncrementRef(int volatile *a);
-
-/*
- * *a--,
- * return 1 if *a == 0 after the operation, 0 otherwise
-*/
-TH_API int THAtomicDecrementRef(int volatile *a);
-
-
-
-/******************************************************************************
- * functions for long type
- ******************************************************************************/
-
-/*
- * *a = newvalue
-*/
-TH_API void THAtomicSetLong(long volatile *a, long newvalue);
-
-/*
- * return *a
-*/
-TH_API long THAtomicGetLong(long volatile *a);
-
-/*
- * *a += value,
- * return previous *a
-*/
-TH_API long THAtomicAddLong(long volatile *a, long value);
-
-/*
- * check if (*a == oldvalue)
- * if true: set *a to newvalue, return 1
- * if false: return 0
-*/
-TH_API long THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue);
-
-
-
-/******************************************************************************
- * functions for ptrdiff_t type
- ******************************************************************************/
-
-/*
- * *a = newvalue
-*/
-TH_API void THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue);
-
-/*
- * return *a
-*/
-TH_API ptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a);
-
-/*
- * *a += value,
- * return previous *a
-*/
-TH_API ptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value);
-
-/*
- * check if (*a == oldvalue)
- * if true: set *a to newvalue, return 1
- * if false: return 0
-*/
-TH_API ptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue);
-
-#if defined(USE_C11_ATOMICS) && defined(ATOMIC_INT_LOCK_FREE) && \
- ATOMIC_INT_LOCK_FREE == 2
-#define TH_ATOMIC_IPC_REFCOUNT 1
-#elif defined(USE_MSC_ATOMICS) || defined(USE_GCC_ATOMICS)
-#define TH_ATOMIC_IPC_REFCOUNT 1
-#endif
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THBlas.c b/contrib/lua-torch/torch7/lib/TH/THBlas.c
deleted file mode 100644
index 35618b26a..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THBlas.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "THBlas.h"
-
-#include "generic/THBlas.c"
-#include "THGenerateAllTypes.h"
diff --git a/contrib/lua-torch/torch7/lib/TH/THBlas.h b/contrib/lua-torch/torch7/lib/TH/THBlas.h
deleted file mode 100644
index 5fef0febc..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THBlas.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef TH_BLAS_INC
-#define TH_BLAS_INC
-
-#include "THGeneral.h"
-
-#define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME)
-
-#include "generic/THBlas.h"
-#include "THGenerateAllTypes.h"
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THConfig.cmake.in b/contrib/lua-torch/torch7/lib/TH/THConfig.cmake.in
deleted file mode 100644
index 306cd878b..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THConfig.cmake.in
+++ /dev/null
@@ -1,9 +0,0 @@
-# Find the TH includes and library
-#
-# TH_INCLUDE_DIR -- where to find the includes
-# TH_LIBRARIES -- list of libraries to link against
-# TH_FOUND -- set to 1 if found
-
-SET(TH_FOUND 1)
-SET(TH_INCLUDE_DIR "@TH_INCLUDE_DIR@")
-SET(TH_LIBRARIES "@TH_LIBRARIES@")
diff --git a/contrib/lua-torch/torch7/lib/TH/THDiskFile.c b/contrib/lua-torch/torch7/lib/TH/THDiskFile.c
deleted file mode 100644
index 3f57b3b35..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THDiskFile.c
+++ /dev/null
@@ -1,797 +0,0 @@
-#include "THGeneral.h"
-#include "THDiskFile.h"
-#include "THFilePrivate.h"
-
-#include <stdint.h>
-#ifndef LLONG_MAX
-#define LLONG_MAX 9223372036854775807LL
-#endif
-
-typedef struct THDiskFile__
-{
- THFile file;
-
- FILE *handle;
- char *name;
- int isNativeEncoding;
- int longSize;
-
-} THDiskFile;
-
-static int THDiskFile_isOpened(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)self;
- return (dfself->handle != NULL);
-}
-
-const char *THDiskFile_name(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)self;
- return dfself->name;
-}
-
-/* workaround mac osx lion ***insane*** fread bug */
-#ifdef __APPLE__
-size_t fread__(void *ptr, size_t size, size_t nitems, FILE *stream)
-{
- size_t nread = 0;
- while(!feof(stream) && !ferror(stream) && (nread < nitems))
- nread += fread((char*)ptr+nread*size, size, THMin(2147483648/size, nitems-nread), stream);
- return nread;
-}
-#else
-#define fread__ fread
-#endif
-
-#define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM) \
- static size_t THDiskFile_read##TYPEC(THFile *self, TYPE *data, size_t n) \
- { \
- THDiskFile *dfself = (THDiskFile*)(self); \
- size_t nread = 0L; \
- \
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); \
- THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file"); \
- \
- if(dfself->file.isBinary) \
- { \
- nread = fread__(data, sizeof(TYPE), n, dfself->handle); \
- if(!dfself->isNativeEncoding && (sizeof(TYPE) > 1) && (nread > 0)) \
- THDiskFile_reverseMemory(data, data, sizeof(TYPE), nread); \
- } \
- else \
- { \
- size_t i; \
- for(i = 0; i < n; i++) \
- { \
- ASCII_READ_ELEM; /* increment here result and break if wrong */ \
- } \
- if(dfself->file.isAutoSpacing && (n > 0)) \
- { \
- int c = fgetc(dfself->handle); \
- if( (c != '\n') && (c != EOF) ) \
- ungetc(c, dfself->handle); \
- } \
- } \
- \
- if(nread != n) \
- { \
- dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \
- if(!dfself->file.isQuiet) \
- THError("read error: read %d blocks instead of %d", nread, n); \
- } \
- \
- return nread; \
- } \
- \
- static size_t THDiskFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \
- { \
- THDiskFile *dfself = (THDiskFile*)(self); \
- size_t nwrite = 0L; \
- \
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); \
- THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file"); \
- \
- if(dfself->file.isBinary) \
- { \
- if(dfself->isNativeEncoding) \
- { \
- nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle); \
- } \
- else \
- { \
- if(sizeof(TYPE) > 1) \
- { \
- char *buffer = THAlloc(sizeof(TYPE)*n); \
- THDiskFile_reverseMemory(buffer, data, sizeof(TYPE), n); \
- nwrite = fwrite(buffer, sizeof(TYPE), n, dfself->handle); \
- THFree(buffer); \
- } \
- else \
- nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle); \
- } \
- } \
- else \
- { \
- size_t i; \
- for(i = 0; i < n; i++) \
- { \
- ASCII_WRITE_ELEM; \
- if( dfself->file.isAutoSpacing && (i < n-1) ) \
- fprintf(dfself->handle, " "); \
- } \
- if(dfself->file.isAutoSpacing && (n > 0)) \
- fprintf(dfself->handle, "\n"); \
- } \
- \
- if(nwrite != n) \
- { \
- dfself->file.hasError = 1; \
- if(!dfself->file.isQuiet) \
- THError("write error: wrote %d blocks instead of %d", nwrite, n); \
- } \
- \
- return nwrite; \
-}
-
-static int THDiskFile_mode(const char *mode, int *isReadable, int *isWritable)
-{
- *isReadable = 0;
- *isWritable = 0;
- if(strlen(mode) == 1)
- {
- if(*mode == 'r')
- {
- *isReadable = 1;
- return 1;
- }
- else if(*mode == 'w')
- {
- *isWritable = 1;
- return 1;
- }
- }
- else if(strlen(mode) == 2)
- {
- if(mode[0] == 'r' && mode[1] == 'w')
- {
- *isReadable = 1;
- *isWritable = 1;
- return 1;
- }
- }
- return 0;
-}
-
-static void THDiskFile_synchronize(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- fflush(dfself->handle);
-}
-
-static void THDiskFile_seek(THFile *self, size_t position)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
-
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
-
-#if defined(_WIN64)
- THArgCheck(position <= (size_t)INT64_MAX, 2, "position must be smaller than INT64_MAX");
- if(_fseeki64(dfself->handle, (__int64)position, SEEK_SET) < 0)
-#elif defined(_WIN32)
- THArgCheck(position <= (size_t)LONG_MAX, 2, "position must be smaller than LONG_MAX");
- if(fseek(dfself->handle, (long)position, SEEK_SET) < 0)
-#else
- THArgCheck(position <= (size_t)LLONG_MAX, 2, "position must be smaller than LLONG_MAX");
- if(fseeko(dfself->handle, (off_t)position, SEEK_SET) < 0)
-#endif
- {
- dfself->file.hasError = 1;
- if(!dfself->file.isQuiet)
- THError("unable to seek to position %zu", position);
- }
-}
-
-static void THDiskFile_seekEnd(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
-
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
-
-#if defined(_WIN64)
- if(_fseeki64(dfself->handle, 0, SEEK_END) < 0)
-#elif defined(_WIN32)
- if(fseek(dfself->handle, 0, SEEK_END) < 0)
-#else
- if(fseeko(dfself->handle, 0, SEEK_END) < 0)
-#endif
- {
- dfself->file.hasError = 1;
- if(!dfself->file.isQuiet)
- THError("unable to seek at end of file");
- }
-}
-
-static size_t THDiskFile_position(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
-
-#if defined(_WIN64)
- __int64 offset = _ftelli64(dfself->handle);
-#elif defined(_WIN32)
- long offset = ftell(dfself->handle);
-#else
- off_t offset = ftello(dfself->handle);
-#endif
- if (offset > -1)
- return (size_t)offset;
- else if(!dfself->file.isQuiet)
- THError("unable to obtain disk file offset (maybe a long overflow occurred)");
-
- return 0;
-}
-
-static void THDiskFile_close(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- fclose(dfself->handle);
- dfself->handle = NULL;
-}
-
-/* Little and Big Endian */
-
-static void THDiskFile_reverseMemory(void *dst, const void *src, size_t blockSize, size_t numBlocks)
-{
- if(blockSize > 1)
- {
- size_t halfBlockSize = blockSize/2;
- char *charSrc = (char*)src;
- char *charDst = (char*)dst;
- size_t b, i;
- for(b = 0; b < numBlocks; b++)
- {
- for(i = 0; i < halfBlockSize; i++)
- {
- char z = charSrc[i];
- charDst[i] = charSrc[blockSize-1-i];
- charDst[blockSize-1-i] = z;
- }
- charSrc += blockSize;
- charDst += blockSize;
- }
- }
-}
-
-int THDiskFile_isLittleEndianCPU(void)
-{
- int x = 7;
- char *ptr = (char *)&x;
-
- if(ptr[0] == 0)
- return 0;
- else
- return 1;
-}
-
-int THDiskFile_isBigEndianCPU(void)
-{
- return(!THDiskFile_isLittleEndianCPU());
-}
-
-void THDiskFile_nativeEndianEncoding(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- dfself->isNativeEncoding = 1;
-}
-
-void THDiskFile_littleEndianEncoding(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- dfself->isNativeEncoding = THDiskFile_isLittleEndianCPU();
-}
-
-void THDiskFile_bigEndianEncoding(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- dfself->isNativeEncoding = !THDiskFile_isLittleEndianCPU();
-}
-
-/* End of Little and Big Endian Stuff */
-
-void THDiskFile_longSize(THFile *self, int size)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- THArgCheck(size == 0 || size == 4 || size == 8, 1, "Invalid long size specified");
- dfself->longSize = size;
-}
-
-void THDiskFile_noBuffer(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- if (setvbuf(dfself->handle, NULL, _IONBF, 0)) {
- THError("error: cannot disable buffer");
- }
-}
-
-static void THDiskFile_free(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- if(dfself->handle)
- fclose(dfself->handle);
- THFree(dfself->name);
- THFree(dfself);
-}
-
-/* READ_WRITE_METHODS(int, Bool, */
-/* int value = 0; int ret = fscanf(file->handle, "%d", &value); array[i] = (value ? 1 : 0); if(ret <= 0) break; else result++, */
-/* int value = (array[i] ? 1 : 0); nElemWritten = fprintf(file->handle, "%d", value), */
-/* true) */
-
-/* Note that we do a trick */
-READ_WRITE_METHODS(unsigned char, Byte,
- nread = fread(data, 1, n, dfself->handle); break,
- nwrite = fwrite(data, 1, n, dfself->handle); break)
-
-READ_WRITE_METHODS(char, Char,
- nread = fread(data, 1, n, dfself->handle); break,
- nwrite = fwrite(data, 1, n, dfself->handle); break)
-
-READ_WRITE_METHODS(short, Short,
- int ret = fscanf(dfself->handle, "%hd", &data[i]); if(ret <= 0) break; else nread++,
- int ret = fprintf(dfself->handle, "%hd", data[i]); if(ret <= 0) break; else nwrite++)
-
-READ_WRITE_METHODS(int, Int,
- int ret = fscanf(dfself->handle, "%d", &data[i]); if(ret <= 0) break; else nread++,
- int ret = fprintf(dfself->handle, "%d", data[i]); if(ret <= 0) break; else nwrite++)
-
-READ_WRITE_METHODS(float, Float,
- int ret = fscanf(dfself->handle, "%g", &data[i]); if(ret <= 0) break; else nread++,
- int ret = fprintf(dfself->handle, "%.9g", data[i]); if(ret <= 0) break; else nwrite++)
-
-READ_WRITE_METHODS(THHalf, Half,
- float buf; int ret = fscanf(dfself->handle, "%g", &buf); if(ret <= 0) break; else { data[i]= TH_float2half(buf); nread++; },
- int ret = fprintf(dfself->handle, "%.9g", TH_half2float(data[i])); if(ret <= 0) break; else nwrite++)
-
-READ_WRITE_METHODS(double, Double,
- int ret = fscanf(dfself->handle, "%lg", &data[i]); if(ret <= 0) break; else nread++,
- int ret = fprintf(dfself->handle, "%.17g", data[i]); if(ret <= 0) break; else nwrite++)
-
-
-/* For Long we need to rewrite everything, because of the special management of longSize */
-static size_t THDiskFile_readLong(THFile *self, long *data, size_t n)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- size_t nread = 0L;
-
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file");
-
- if(dfself->file.isBinary)
- {
- if(dfself->longSize == 0 || dfself->longSize == sizeof(long))
- {
- nread = fread__(data, sizeof(long), n, dfself->handle);
- if(!dfself->isNativeEncoding && (sizeof(long) > 1) && (nread > 0))
- THDiskFile_reverseMemory(data, data, sizeof(long), nread);
- } else if(dfself->longSize == 4)
- {
- nread = fread__(data, 4, n, dfself->handle);
- if(!dfself->isNativeEncoding && (nread > 0))
- THDiskFile_reverseMemory(data, data, 4, nread);
- size_t i;
- for(i = nread; i > 0; i--)
- data[i-1] = ((int *)data)[i-1];
- }
- else /* if(dfself->longSize == 8) */
- {
- int big_endian = !THDiskFile_isLittleEndianCPU();
- int32_t *buffer = THAlloc(8*n);
- nread = fread__(buffer, 8, n, dfself->handle);
- size_t i;
- for(i = nread; i > 0; i--)
- data[i-1] = buffer[2*(i-1) + big_endian];
- THFree(buffer);
- if(!dfself->isNativeEncoding && (nread > 0))
- THDiskFile_reverseMemory(data, data, 4, nread);
- }
- }
- else
- {
- size_t i;
- for(i = 0; i < n; i++)
- {
- int ret = fscanf(dfself->handle, "%ld", &data[i]); if(ret <= 0) break; else nread++;
- }
- if(dfself->file.isAutoSpacing && (n > 0))
- {
- int c = fgetc(dfself->handle);
- if( (c != '\n') && (c != EOF) )
- ungetc(c, dfself->handle);
- }
- }
-
- if(nread != n)
- {
- dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */
- if(!dfself->file.isQuiet)
- THError("read error: read %d blocks instead of %d", nread, n);
- }
-
- return nread;
-}
-
-static size_t THDiskFile_writeLong(THFile *self, long *data, size_t n)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- size_t nwrite = 0L;
-
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file");
-
- if(dfself->file.isBinary)
- {
- if(dfself->longSize == 0 || dfself->longSize == sizeof(long))
- {
- if(dfself->isNativeEncoding)
- {
- nwrite = fwrite(data, sizeof(long), n, dfself->handle);
- }
- else
- {
- char *buffer = THAlloc(sizeof(long)*n);
- THDiskFile_reverseMemory(buffer, data, sizeof(long), n);
- nwrite = fwrite(buffer, sizeof(long), n, dfself->handle);
- THFree(buffer);
- }
- } else if(dfself->longSize == 4)
- {
- int32_t *buffer = THAlloc(4*n);
- size_t i;
- for(i = 0; i < n; i++)
- buffer[i] = data[i];
- if(!dfself->isNativeEncoding)
- THDiskFile_reverseMemory(buffer, buffer, 4, n);
- nwrite = fwrite(buffer, 4, n, dfself->handle);
- THFree(buffer);
- }
- else /* if(dfself->longSize == 8) */
- {
- int big_endian = !THDiskFile_isLittleEndianCPU();
- int32_t *buffer = THAlloc(8*n);
- size_t i;
- for(i = 0; i < n; i++)
- {
- buffer[2*i + !big_endian] = 0;
- buffer[2*i + big_endian] = data[i];
- }
- if(!dfself->isNativeEncoding)
- THDiskFile_reverseMemory(buffer, buffer, 8, n);
- nwrite = fwrite(buffer, 8, n, dfself->handle);
- THFree(buffer);
- }
- }
- else
- {
- size_t i;
- for(i = 0; i < n; i++)
- {
- int ret = fprintf(dfself->handle, "%ld", data[i]); if(ret <= 0) break; else nwrite++;
- if( dfself->file.isAutoSpacing && (i < n-1) )
- fprintf(dfself->handle, " ");
- }
- if(dfself->file.isAutoSpacing && (n > 0))
- fprintf(dfself->handle, "\n");
- }
-
- if(nwrite != n)
- {
- dfself->file.hasError = 1;
- if(!dfself->file.isQuiet)
- THError("write error: wrote %d blocks instead of %d", nwrite, n);
- }
-
- return nwrite;
-}
-
-static size_t THDiskFile_readString(THFile *self, const char *format, char **str_)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file");
- THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, "format must be '*a' or '*l'");
-
-/* note: the string won't survive long, as it is copied into lua */
-/* so 1024 is not that big... */
-#define TBRS_BSZ 1024L
-
- if(format[1] == 'a')
- {
- char *p = THAlloc(TBRS_BSZ);
- size_t total = TBRS_BSZ;
- size_t pos = 0;
-
- for (;;)
- {
- if(total-pos == 0) /* we need more space! */
- {
- total += TBRS_BSZ;
- p = THRealloc(p, total);
- }
- pos += fread(p+pos, 1, total-pos, dfself->handle);
- if (pos < total) /* eof? */
- {
- if(pos == 0)
- {
- THFree(p);
- dfself->file.hasError = 1;
- if(!dfself->file.isQuiet)
- THError("read error: read 0 blocks instead of 1");
-
- *str_ = NULL;
- return 0;
- }
- *str_ = p;
- return pos;
- }
- }
- }
- else
- {
- char *p = THAlloc(TBRS_BSZ);
- size_t total = TBRS_BSZ;
- size_t pos = 0;
- size_t size;
-
- for (;;)
- {
- if(total-pos <= 1) /* we can only write '\0' in there! */
- {
- total += TBRS_BSZ;
- p = THRealloc(p, total);
- }
- if (fgets(p+pos, total-pos, dfself->handle) == NULL) /* eof? */
- {
- if(pos == 0)
- {
- THFree(p);
- dfself->file.hasError = 1;
- if(!dfself->file.isQuiet)
- THError("read error: read 0 blocks instead of 1");
-
- *str_ = NULL;
- return 0;
- }
- *str_ = p;
- return pos;
- }
- size = strlen(p+pos);
- if (size == 0 || (p+pos)[size-1] != '\n')
- {
- pos += size;
- }
- else
- {
- pos += size-1; /* do not include `eol' */
- *str_ = p;
- return pos;
- }
- }
- }
-
- *str_ = NULL;
- return 0;
-}
-
-
-static size_t THDiskFile_writeString(THFile *self, const char *str, size_t size)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- size_t nwrite;
-
- THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file");
- THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file");
-
- nwrite = fwrite(str, 1, size, dfself->handle);
- if(nwrite != size)
- {
- dfself->file.hasError = 1;
- if(!dfself->file.isQuiet)
- THError("write error: wrote %zu blocks instead of %zu", nwrite, size);
- }
-
- return nwrite;
-}
-
-THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet)
-{
- static struct THFileVTable vtable = {
- THDiskFile_isOpened,
-
- THDiskFile_readByte,
- THDiskFile_readChar,
- THDiskFile_readShort,
- THDiskFile_readInt,
- THDiskFile_readLong,
- THDiskFile_readFloat,
- THDiskFile_readDouble,
- THDiskFile_readHalf,
- THDiskFile_readString,
-
- THDiskFile_writeByte,
- THDiskFile_writeChar,
- THDiskFile_writeShort,
- THDiskFile_writeInt,
- THDiskFile_writeLong,
- THDiskFile_writeFloat,
- THDiskFile_writeDouble,
- THDiskFile_writeHalf,
- THDiskFile_writeString,
-
- THDiskFile_synchronize,
- THDiskFile_seek,
- THDiskFile_seekEnd,
- THDiskFile_position,
- THDiskFile_close,
- THDiskFile_free
- };
-
- int isReadable;
- int isWritable;
- FILE *handle;
- THDiskFile *self;
-
- THArgCheck(THDiskFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'");
-
- if( isReadable && isWritable )
- {
- handle = fopen(name, "r+b");
- if(!handle)
- {
- handle = fopen(name, "wb");
- if(handle)
- {
- fclose(handle);
- handle = fopen(name, "r+b");
- }
- }
- }
- else
- handle = fopen(name, (isReadable ? "rb" : "wb"));
-
- if(!handle)
- {
- if(isQuiet)
- return 0;
- else
- THError("cannot open <%s> in mode %c%c", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' '));
- }
-
- self = THAlloc(sizeof(THDiskFile));
-
- self->handle = handle;
- self->name = THAlloc(strlen(name)+1);
- strcpy(self->name, name);
- self->isNativeEncoding = 1;
- self->longSize = 0;
-
- self->file.vtable = &vtable;
- self->file.isQuiet = isQuiet;
- self->file.isReadable = isReadable;
- self->file.isWritable = isWritable;
- self->file.isBinary = 0;
- self->file.isAutoSpacing = 1;
- self->file.hasError = 0;
-
- return (THFile*)self;
-}
-
-/* PipeFile */
-
-static int THPipeFile_mode(const char *mode, int *isReadable, int *isWritable)
-{
- *isReadable = 0;
- *isWritable = 0;
- if(strlen(mode) == 1)
- {
- if(*mode == 'r')
- {
- *isReadable = 1;
- return 1;
- }
- else if(*mode == 'w')
- {
- *isWritable = 1;
- return 1;
- }
- }
- return 0;
-}
-
-static void THPipeFile_free(THFile *self)
-{
- THDiskFile *dfself = (THDiskFile*)(self);
- if(dfself->handle)
- pclose(dfself->handle);
- THFree(dfself->name);
- THFree(dfself);
-}
-
-THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet)
-{
- static struct THFileVTable vtable = {
- THDiskFile_isOpened,
-
- THDiskFile_readByte,
- THDiskFile_readChar,
- THDiskFile_readShort,
- THDiskFile_readInt,
- THDiskFile_readLong,
- THDiskFile_readFloat,
- THDiskFile_readDouble,
- THDiskFile_readHalf,
- THDiskFile_readString,
-
- THDiskFile_writeByte,
- THDiskFile_writeChar,
- THDiskFile_writeShort,
- THDiskFile_writeInt,
- THDiskFile_writeLong,
- THDiskFile_writeFloat,
- THDiskFile_writeDouble,
- THDiskFile_writeHalf,
- THDiskFile_writeString,
-
- THDiskFile_synchronize,
- THDiskFile_seek,
- THDiskFile_seekEnd,
- THDiskFile_position,
- THDiskFile_close,
- THPipeFile_free
- };
-
- int isReadable;
- int isWritable;
- FILE *handle;
- THDiskFile *self;
-
- THArgCheck(THPipeFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w'");
-
-#ifdef _WIN32
- handle = _popen(name, (isReadable ? "rb" : "wb"));
-#else
- handle = popen(name, (isReadable ? "r" : "w"));
-#endif
-
- if(!handle)
- {
- if(isQuiet)
- return 0;
- else
- THError("cannot open <%s> in mode %c%c. This might be because eg the executable doesn't exist, but it could also be because you are out of memory.", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' '));
- }
-
- self = THAlloc(sizeof(THDiskFile));
-
- self->handle = handle;
- self->name = THAlloc(strlen(name)+1);
- strcpy(self->name, name);
- self->isNativeEncoding = 1;
- self->longSize = 0;
-
- self->file.vtable = &vtable;
- self->file.isQuiet = isQuiet;
- self->file.isReadable = isReadable;
- self->file.isWritable = isWritable;
- self->file.isBinary = 0;
- self->file.isAutoSpacing = 1;
- self->file.hasError = 0;
-
- return (THFile*)self;
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THDiskFile.h b/contrib/lua-torch/torch7/lib/TH/THDiskFile.h
deleted file mode 100644
index bc5c001c7..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THDiskFile.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef TH_DISK_FILE_INC
-#define TH_DISK_FILE_INC
-
-#include "THFile.h"
-
-TH_API THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet);
-TH_API THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet);
-
-TH_API const char *THDiskFile_name(THFile *self);
-
-TH_API int THDiskFile_isLittleEndianCPU(void);
-TH_API int THDiskFile_isBigEndianCPU(void);
-TH_API void THDiskFile_nativeEndianEncoding(THFile *self);
-TH_API void THDiskFile_littleEndianEncoding(THFile *self);
-TH_API void THDiskFile_bigEndianEncoding(THFile *self);
-TH_API void THDiskFile_longSize(THFile *self, int size);
-TH_API void THDiskFile_noBuffer(THFile *self);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THFile.c b/contrib/lua-torch/torch7/lib/TH/THFile.c
deleted file mode 100644
index 3717b7b5c..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THFile.c
+++ /dev/null
@@ -1,157 +0,0 @@
-#include "THFile.h"
-#include "THFilePrivate.h"
-
-#define IMPLEMENT_THFILE_RW(TYPEC, TYPE) \
- size_t THFile_read##TYPEC##Raw(THFile *self, TYPE *data, size_t n) \
- { \
- return (*self->vtable->read##TYPEC)(self, data, n); \
- } \
- \
- size_t THFile_write##TYPEC##Raw(THFile *self, TYPE *data, size_t n) \
- { \
- return (*self->vtable->write##TYPEC)(self, data, n); \
- }
-
-IMPLEMENT_THFILE_RW(Byte, unsigned char)
-IMPLEMENT_THFILE_RW(Char, char)
-IMPLEMENT_THFILE_RW(Short, short)
-IMPLEMENT_THFILE_RW(Int, int)
-IMPLEMENT_THFILE_RW(Long, long)
-IMPLEMENT_THFILE_RW(Float, float)
-IMPLEMENT_THFILE_RW(Double, double)
-IMPLEMENT_THFILE_RW(Half, THHalf)
-
-size_t THFile_readStringRaw(THFile *self, const char *format, char **str_)
-{
- return self->vtable->readString(self, format, str_);
-}
-
-size_t THFile_writeStringRaw(THFile *self, const char *str, size_t size)
-{
- return self->vtable->writeString(self, str, size);
-}
-
-void THFile_synchronize(THFile *self)
-{
- self->vtable->synchronize(self);
-}
-
-void THFile_seek(THFile *self, size_t position)
-{
- self->vtable->seek(self, position);
-}
-
-void THFile_seekEnd(THFile *self)
-{
- self->vtable->seekEnd(self);
-}
-
-size_t THFile_position(THFile *self)
-{
- return self->vtable->position(self);
-}
-
-void THFile_close(THFile *self)
-{
- self->vtable->close(self);
-}
-
-void THFile_free(THFile *self)
-{
- self->vtable->free(self);
-}
-
-int THFile_isOpened(THFile *self)
-{
- return self->vtable->isOpened(self);
-}
-
-#define IMPLEMENT_THFILE_FLAGS(FLAG) \
- int THFile_##FLAG(THFile *self) \
- { \
- return self->FLAG; \
- }
-
-IMPLEMENT_THFILE_FLAGS(isQuiet)
-IMPLEMENT_THFILE_FLAGS(isReadable)
-IMPLEMENT_THFILE_FLAGS(isWritable)
-IMPLEMENT_THFILE_FLAGS(isBinary)
-IMPLEMENT_THFILE_FLAGS(isAutoSpacing)
-IMPLEMENT_THFILE_FLAGS(hasError)
-
-void THFile_binary(THFile *self)
-{
- self->isBinary = 1;
-}
-
-void THFile_ascii(THFile *self)
-{
- self->isBinary = 0;
-}
-
-void THFile_autoSpacing(THFile *self)
-{
- self->isAutoSpacing = 1;
-}
-
-void THFile_noAutoSpacing(THFile *self)
-{
- self->isAutoSpacing = 0;
-}
-
-void THFile_quiet(THFile *self)
-{
- self->isQuiet = 1;
-}
-
-void THFile_pedantic(THFile *self)
-{
- self->isQuiet = 0;
-}
-
-void THFile_clearError(THFile *self)
-{
- self->hasError = 0;
-}
-
-#define IMPLEMENT_THFILE_SCALAR(TYPEC, TYPE) \
- TYPE THFile_read##TYPEC##Scalar(THFile *self) \
- { \
- TYPE scalar; \
- THFile_read##TYPEC##Raw(self, &scalar, 1); \
- return scalar; \
- } \
- \
- void THFile_write##TYPEC##Scalar(THFile *self, TYPE scalar) \
- { \
- THFile_write##TYPEC##Raw(self, &scalar, 1); \
- }
-
-IMPLEMENT_THFILE_SCALAR(Byte, unsigned char)
-IMPLEMENT_THFILE_SCALAR(Char, char)
-IMPLEMENT_THFILE_SCALAR(Short, short)
-IMPLEMENT_THFILE_SCALAR(Int, int)
-IMPLEMENT_THFILE_SCALAR(Long, long)
-IMPLEMENT_THFILE_SCALAR(Float, float)
-IMPLEMENT_THFILE_SCALAR(Double, double)
-IMPLEMENT_THFILE_SCALAR(Half, THHalf)
-
-#define IMPLEMENT_THFILE_STORAGE(TYPEC, TYPE) \
- size_t THFile_read##TYPEC(THFile *self, TH##TYPEC##Storage *storage) \
- { \
- return THFile_read##TYPEC##Raw(self, storage->data, storage->size); \
- } \
- \
- size_t THFile_write##TYPEC(THFile *self, TH##TYPEC##Storage *storage) \
- { \
- return THFile_write##TYPEC##Raw(self, storage->data, storage->size); \
- }
-
-IMPLEMENT_THFILE_STORAGE(Byte, unsigned char)
-IMPLEMENT_THFILE_STORAGE(Char, char)
-IMPLEMENT_THFILE_STORAGE(Short, short)
-IMPLEMENT_THFILE_STORAGE(Int, int)
-IMPLEMENT_THFILE_STORAGE(Long, long)
-IMPLEMENT_THFILE_STORAGE(Float, float)
-IMPLEMENT_THFILE_STORAGE(Double, double)
-IMPLEMENT_THFILE_STORAGE(Half, THHalf)
diff --git a/contrib/lua-torch/torch7/lib/TH/THFile.h b/contrib/lua-torch/torch7/lib/TH/THFile.h
deleted file mode 100644
index e097bdf34..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THFile.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef TH_FILE_INC
-#define TH_FILE_INC
-
-#include "THStorage.h"
-
-typedef struct THFile__ THFile;
-
-TH_API int THFile_isOpened(THFile *self);
-TH_API int THFile_isQuiet(THFile *self);
-TH_API int THFile_isReadable(THFile *self);
-TH_API int THFile_isWritable(THFile *self);
-TH_API int THFile_isBinary(THFile *self);
-TH_API int THFile_isAutoSpacing(THFile *self);
-TH_API int THFile_hasError(THFile *self);
-
-TH_API void THFile_binary(THFile *self);
-TH_API void THFile_ascii(THFile *self);
-TH_API void THFile_autoSpacing(THFile *self);
-TH_API void THFile_noAutoSpacing(THFile *self);
-TH_API void THFile_quiet(THFile *self);
-TH_API void THFile_pedantic(THFile *self);
-TH_API void THFile_clearError(THFile *self);
-
-/* scalar */
-TH_API unsigned char THFile_readByteScalar(THFile *self);
-TH_API char THFile_readCharScalar(THFile *self);
-TH_API short THFile_readShortScalar(THFile *self);
-TH_API int THFile_readIntScalar(THFile *self);
-TH_API long THFile_readLongScalar(THFile *self);
-TH_API float THFile_readFloatScalar(THFile *self);
-TH_API double THFile_readDoubleScalar(THFile *self);
-
-TH_API void THFile_writeByteScalar(THFile *self, unsigned char scalar);
-TH_API void THFile_writeCharScalar(THFile *self, char scalar);
-TH_API void THFile_writeShortScalar(THFile *self, short scalar);
-TH_API void THFile_writeIntScalar(THFile *self, int scalar);
-TH_API void THFile_writeLongScalar(THFile *self, long scalar);
-TH_API void THFile_writeFloatScalar(THFile *self, float scalar);
-TH_API void THFile_writeDoubleScalar(THFile *self, double scalar);
-
-/* storage */
-TH_API size_t THFile_readByte(THFile *self, THByteStorage *storage);
-TH_API size_t THFile_readChar(THFile *self, THCharStorage *storage);
-TH_API size_t THFile_readShort(THFile *self, THShortStorage *storage);
-TH_API size_t THFile_readInt(THFile *self, THIntStorage *storage);
-TH_API size_t THFile_readLong(THFile *self, THLongStorage *storage);
-TH_API size_t THFile_readFloat(THFile *self, THFloatStorage *storage);
-TH_API size_t THFile_readDouble(THFile *self, THDoubleStorage *storage);
-
-TH_API size_t THFile_writeByte(THFile *self, THByteStorage *storage);
-TH_API size_t THFile_writeChar(THFile *self, THCharStorage *storage);
-TH_API size_t THFile_writeShort(THFile *self, THShortStorage *storage);
-TH_API size_t THFile_writeInt(THFile *self, THIntStorage *storage);
-TH_API size_t THFile_writeLong(THFile *self, THLongStorage *storage);
-TH_API size_t THFile_writeFloat(THFile *self, THFloatStorage *storage);
-TH_API size_t THFile_writeDouble(THFile *self, THDoubleStorage *storage);
-
-/* raw */
-TH_API size_t THFile_readByteRaw(THFile *self, unsigned char *data, size_t n);
-TH_API size_t THFile_readCharRaw(THFile *self, char *data, size_t n);
-TH_API size_t THFile_readShortRaw(THFile *self, short *data, size_t n);
-TH_API size_t THFile_readIntRaw(THFile *self, int *data, size_t n);
-TH_API size_t THFile_readLongRaw(THFile *self, long *data, size_t n);
-TH_API size_t THFile_readFloatRaw(THFile *self, float *data, size_t n);
-TH_API size_t THFile_readDoubleRaw(THFile *self, double *data, size_t n);
-TH_API size_t THFile_readStringRaw(THFile *self, const char *format, char **str_); /* you must deallocate str_ */
-
-TH_API size_t THFile_writeByteRaw(THFile *self, unsigned char *data, size_t n);
-TH_API size_t THFile_writeCharRaw(THFile *self, char *data, size_t n);
-TH_API size_t THFile_writeShortRaw(THFile *self, short *data, size_t n);
-TH_API size_t THFile_writeIntRaw(THFile *self, int *data, size_t n);
-TH_API size_t THFile_writeLongRaw(THFile *self, long *data, size_t n);
-TH_API size_t THFile_writeFloatRaw(THFile *self, float *data, size_t n);
-TH_API size_t THFile_writeDoubleRaw(THFile *self, double *data, size_t n);
-TH_API size_t THFile_writeStringRaw(THFile *self, const char *str, size_t size);
-
-TH_API THHalf THFile_readHalfScalar(THFile *self);
-TH_API void THFile_writeHalfScalar(THFile *self, THHalf scalar);
-TH_API size_t THFile_readHalf(THFile *self, THHalfStorage *storage);
-TH_API size_t THFile_writeHalf(THFile *self, THHalfStorage *storage);
-TH_API size_t THFile_readHalfRaw(THFile *self, THHalf* data, size_t size);
-TH_API size_t THFile_writeHalfRaw(THFile *self, THHalf* data, size_t size);
-
-TH_API void THFile_synchronize(THFile *self);
-TH_API void THFile_seek(THFile *self, size_t position);
-TH_API void THFile_seekEnd(THFile *self);
-TH_API size_t THFile_position(THFile *self);
-TH_API void THFile_close(THFile *self);
-TH_API void THFile_free(THFile *self);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THFilePrivate.h b/contrib/lua-torch/torch7/lib/TH/THFilePrivate.h
deleted file mode 100644
index 55169c3bc..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THFilePrivate.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#include "THGeneral.h"
-
-#include "THHalf.h"
-
-
-struct THFile__
-{
- struct THFileVTable *vtable;
-
- int isQuiet;
- int isReadable;
- int isWritable;
- int isBinary;
- int isAutoSpacing;
- int hasError;
-};
-
-/* virtual table definition */
-
-struct THFileVTable
-{
- int (*isOpened)(THFile *self);
-
- size_t (*readByte)(THFile *self, unsigned char *data, size_t n);
- size_t (*readChar)(THFile *self, char *data, size_t n);
- size_t (*readShort)(THFile *self, short *data, size_t n);
- size_t (*readInt)(THFile *self, int *data, size_t n);
- size_t (*readLong)(THFile *self, long *data, size_t n);
- size_t (*readFloat)(THFile *self, float *data, size_t n);
- size_t (*readDouble)(THFile *self, double *data, size_t n);
- size_t (*readHalf)(THFile *self, THHalf *data, size_t n);
- size_t (*readString)(THFile *self, const char *format, char **str_);
-
- size_t (*writeByte)(THFile *self, unsigned char *data, size_t n);
- size_t (*writeChar)(THFile *self, char *data, size_t n);
- size_t (*writeShort)(THFile *self, short *data, size_t n);
- size_t (*writeInt)(THFile *self, int *data, size_t n);
- size_t (*writeLong)(THFile *self, long *data, size_t n);
- size_t (*writeFloat)(THFile *self, float *data, size_t n);
- size_t (*writeDouble)(THFile *self, double *data, size_t n);
- size_t (*writeHalf)(THFile *self, THHalf *data, size_t n);
- size_t (*writeString)(THFile *self, const char *str, size_t size);
-
- void (*synchronize)(THFile *self);
- void (*seek)(THFile *self, size_t position);
- void (*seekEnd)(THFile *self);
- size_t (*position)(THFile *self);
- void (*close)(THFile *self);
- void (*free)(THFile *self);
-};
diff --git a/contrib/lua-torch/torch7/lib/TH/THGeneral.c b/contrib/lua-torch/torch7/lib/TH/THGeneral.c
deleted file mode 100644
index f093c422f..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGeneral.c
+++ /dev/null
@@ -1,406 +0,0 @@
-#include "THGeneral.h"
-#include "THAtomic.h"
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#ifndef TH_HAVE_THREAD
-#define __thread
-#elif _MSC_VER
-#define __thread __declspec( thread )
-#endif
-
-#if defined(__APPLE__)
-#include <malloc/malloc.h>
-#endif
-
-#if defined(__linux__)
-#include <malloc.h>
-#endif
-
-#if defined(__FreeBSD__)
-#include <malloc_np.h>
-#endif
-
-/* Torch Error Handling */
-static void defaultErrorHandlerFunction(const char *msg, void *data)
-{
- printf("$ Error: %s\n", msg);
- abort();
-}
-
-static THErrorHandlerFunction defaultErrorHandler = defaultErrorHandlerFunction;
-static void *defaultErrorHandlerData;
-static __thread THErrorHandlerFunction threadErrorHandler = NULL;
-static __thread void *threadErrorHandlerData;
-
-void _THError(const char *file, const int line, const char *fmt, ...)
-{
- char msg[2048];
- va_list args;
-
- /* vasprintf not standard */
- /* vsnprintf: how to handle if does not exists? */
- va_start(args, fmt);
- int n = vsnprintf(msg, 2048, fmt, args);
- va_end(args);
-
- if(n < 2048) {
- snprintf(msg + n, 2048 - n, " at %s:%d", file, line);
- }
-
- if (threadErrorHandler)
- (*threadErrorHandler)(msg, threadErrorHandlerData);
- else
- (*defaultErrorHandler)(msg, defaultErrorHandlerData);
-}
-
-void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...) {
- char msg[1024];
- va_list args;
- va_start(args, fmt);
- vsnprintf(msg, 1024, fmt, args);
- va_end(args);
- _THError(file, line, "Assertion `%s' failed. %s", exp, msg);
-}
-
-void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data)
-{
- threadErrorHandler = new_handler;
- threadErrorHandlerData = data;
-}
-
-void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data)
-{
- if (new_handler)
- defaultErrorHandler = new_handler;
- else
- defaultErrorHandler = defaultErrorHandlerFunction;
- defaultErrorHandlerData = data;
-}
-
-/* Torch Arg Checking Handling */
-static void defaultArgErrorHandlerFunction(int argNumber, const char *msg, void *data)
-{
- if(msg)
- printf("$ Invalid argument %d: %s\n", argNumber, msg);
- else
- printf("$ Invalid argument %d\n", argNumber);
- exit(-1);
-}
-
-static THArgErrorHandlerFunction defaultArgErrorHandler = defaultArgErrorHandlerFunction;
-static void *defaultArgErrorHandlerData;
-static __thread THArgErrorHandlerFunction threadArgErrorHandler = NULL;
-static __thread void *threadArgErrorHandlerData;
-
-void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...)
-{
- if(!condition) {
- char msg[2048];
- va_list args;
-
- /* vasprintf not standard */
- /* vsnprintf: how to handle if does not exists? */
- va_start(args, fmt);
- int n = vsnprintf(msg, 2048, fmt, args);
- va_end(args);
-
- if(n < 2048) {
- snprintf(msg + n, 2048 - n, " at %s:%d", file, line);
- }
-
- if (threadArgErrorHandler)
- (*threadArgErrorHandler)(argNumber, msg, threadArgErrorHandlerData);
- else
- (*defaultArgErrorHandler)(argNumber, msg, defaultArgErrorHandlerData);
- }
-}
-
-void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)
-{
- threadArgErrorHandler = new_handler;
- threadArgErrorHandlerData = data;
-}
-
-void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)
-{
- if (new_handler)
- defaultArgErrorHandler = new_handler;
- else
- defaultArgErrorHandler = defaultArgErrorHandlerFunction;
- defaultArgErrorHandlerData = data;
-}
-
-static __thread void (*torchGCFunction)(void *data) = NULL;
-static __thread void *torchGCData;
-static ptrdiff_t heapSize = 0;
-static __thread ptrdiff_t heapDelta = 0;
-static const ptrdiff_t heapMaxDelta = (ptrdiff_t)1e6; // limit to +/- 1MB before updating heapSize
-static const ptrdiff_t heapMinDelta = (ptrdiff_t)-1e6;
-static __thread ptrdiff_t heapSoftmax = (ptrdiff_t)3e8; // 300MB, adjusted upward dynamically
-static const double heapSoftmaxGrowthThresh = 0.8; // grow softmax if >80% max after GC
-static const double heapSoftmaxGrowthFactor = 1.4; // grow softmax by 40%
-
-/* Optional hook for integrating with a garbage-collected frontend.
- *
- * If torch is running with a garbage-collected frontend (e.g. Lua),
- * the GC isn't aware of TH-allocated memory so may not know when it
- * needs to run. These hooks trigger the GC to run in two cases:
- *
- * (1) When a memory allocation (malloc, realloc, ...) fails
- * (2) When the total TH-allocated memory hits a dynamically-adjusted
- * soft maximum.
- */
-void THSetGCHandler( void (*torchGCFunction_)(void *data), void *data )
-{
- torchGCFunction = torchGCFunction_;
- torchGCData = data;
-}
-
-/* it is guaranteed the allocated size is not bigger than PTRDIFF_MAX */
-static ptrdiff_t getAllocSize(void *ptr) {
-#if defined(__unix) && defined(HAVE_MALLOC_USABLE_SIZE)
- return malloc_usable_size(ptr);
-#elif defined(__APPLE__)
- return malloc_size(ptr);
-#elif defined(_WIN32)
- if(ptr) { return _msize(ptr); } else { return 0; }
-#else
- return 0;
-#endif
-}
-
-static ptrdiff_t applyHeapDelta() {
- ptrdiff_t oldHeapSize = THAtomicAddPtrdiff(&heapSize, heapDelta);
-#ifdef DEBUG
- if (heapDelta > 0 && oldHeapSize > PTRDIFF_MAX - heapDelta)
- THError("applyHeapDelta: heapSize(%td) + increased(%td) > PTRDIFF_MAX, heapSize overflow!", oldHeapSize, heapDelta);
- if (heapDelta < 0 && oldHeapSize < PTRDIFF_MIN - heapDelta)
- THError("applyHeapDelta: heapSize(%td) + decreased(%td) < PTRDIFF_MIN, heapSize underflow!", oldHeapSize, heapDelta);
-#endif
- ptrdiff_t newHeapSize = oldHeapSize + heapDelta;
- heapDelta = 0;
- return newHeapSize;
-}
-
-/* (1) if the torch-allocated heap size exceeds the soft max, run GC
- * (2) if post-GC heap size exceeds 80% of the soft max, increase the
- * soft max by 40%
- */
-static void maybeTriggerGC(ptrdiff_t curHeapSize) {
- if (torchGCFunction && curHeapSize > heapSoftmax) {
- torchGCFunction(torchGCData);
-
- // ensure heapSize is accurate before updating heapSoftmax
- ptrdiff_t newHeapSize = applyHeapDelta();
-
- if (newHeapSize > heapSoftmax * heapSoftmaxGrowthThresh) {
- heapSoftmax = (ptrdiff_t)(heapSoftmax * heapSoftmaxGrowthFactor);
- }
- }
-}
-
-// hooks into the TH heap tracking
-void THHeapUpdate(ptrdiff_t size) {
-#ifdef DEBUG
- if (size > 0 && heapDelta > PTRDIFF_MAX - size)
- THError("THHeapUpdate: heapDelta(%td) + increased(%td) > PTRDIFF_MAX, heapDelta overflow!", heapDelta, size);
- if (size < 0 && heapDelta < PTRDIFF_MIN - size)
- THError("THHeapUpdate: heapDelta(%td) + decreased(%td) < PTRDIFF_MIN, heapDelta underflow!", heapDelta, size);
-#endif
-
- heapDelta += size;
-
- // batch updates to global heapSize to minimize thread contention
- if (heapDelta < heapMaxDelta && heapDelta > heapMinDelta) {
- return;
- }
-
- ptrdiff_t newHeapSize = applyHeapDelta();
-
- if (size > 0) {
- maybeTriggerGC(newHeapSize);
- }
-}
-
-static void* THAllocInternal(ptrdiff_t size)
-{
- void *ptr;
-
- if (size > 5120)
- {
-#if (defined(__unix) || defined(__APPLE__)) && (!defined(DISABLE_POSIX_MEMALIGN))
- if (posix_memalign(&ptr, 64, size) != 0)
- ptr = NULL;
-/*
-#elif defined(_WIN32)
- ptr = _aligned_malloc(size, 64);
-*/
-#else
- ptr = malloc(size);
-#endif
- }
- else
- {
- ptr = malloc(size);
- }
-
- THHeapUpdate(getAllocSize(ptr));
- return ptr;
-}
-
-void* THAlloc(ptrdiff_t size)
-{
- void *ptr;
-
- if(size < 0)
- THError("$ Torch: invalid memory size -- maybe an overflow?");
-
- if(size == 0)
- return NULL;
-
- ptr = THAllocInternal(size);
-
- if(!ptr && torchGCFunction) {
- torchGCFunction(torchGCData);
- ptr = THAllocInternal(size);
- }
-
- if(!ptr)
- THError("$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824);
-
- return ptr;
-}
-
-void* THRealloc(void *ptr, ptrdiff_t size)
-{
- if(!ptr)
- return(THAlloc(size));
-
- if(size == 0)
- {
- THFree(ptr);
- return NULL;
- }
-
- if(size < 0)
- THError("$ Torch: invalid memory size -- maybe an overflow?");
-
- ptrdiff_t oldSize = -getAllocSize(ptr);
- void *newptr = realloc(ptr, size);
-
- if(!newptr && torchGCFunction) {
- torchGCFunction(torchGCData);
- newptr = realloc(ptr, size);
- }
-
- if(!newptr)
- THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824);
-
- // update heapSize only after successfully reallocated
- THHeapUpdate(oldSize + getAllocSize(newptr));
-
- return newptr;
-}
-
-void THFree(void *ptr)
-{
- THHeapUpdate(-getAllocSize(ptr));
- free(ptr);
-}
-
-double THLog1p(const double x)
-{
-#if (defined(_MSC_VER) || defined(__MINGW32__))
- volatile double y = 1 + x;
- return log(y) - ((y-1)-x)/y ; /* cancels errors with IEEE arithmetic */
-#else
- return log1p(x);
-#endif
-}
-
-void THSetNumThreads(int num_threads)
-{
-#ifdef _OPENMP
- omp_set_num_threads(num_threads);
-#endif
-#ifdef TH_BLAS_OPEN
- extern void openblas_set_num_threads(int);
- openblas_set_num_threads(num_threads);
-#endif
-#ifdef TH_BLAS_MKL
- extern void mkl_set_num_threads(int);
- mkl_set_num_threads(num_threads);
-
-#endif
-}
-
-int THGetNumThreads(void)
-{
- int nthreads = 1;
-#ifdef _OPENMP
- nthreads = omp_get_max_threads();
-#endif
-#ifdef TH_BLAS_OPEN
- int bl_threads = 1;
- extern int openblas_get_num_threads(void);
- bl_threads = openblas_get_num_threads();
- nthreads = nthreads > bl_threads ? bl_threads : nthreads;
-#endif
-#ifdef TH_BLAS_MKL
- int bl_threads = 1;
- extern int mkl_get_max_threads(void);
- bl_threads = mkl_get_max_threads();
- nthreads = nthreads > bl_threads ? bl_threads : nthreads;
-#endif
- return nthreads;
-}
-
-int THGetNumCores(void)
-{
-#ifdef _OPENMP
- return omp_get_num_procs();
-#else
- return 1;
-#endif
-}
-
-#ifdef TH_BLAS_MKL
-extern int mkl_get_max_threads(void);
-#endif
-
-TH_API void THInferNumThreads(void)
-{
-#if defined(_OPENMP) && defined(TH_BLAS_MKL)
- // If we are using MKL an OpenMP make sure the number of threads match.
- // Otherwise, MKL and our OpenMP-enabled functions will keep changing the
- // size of the OpenMP thread pool, resulting in worse performance (and memory
- // leaks in GCC 5.4)
- omp_set_num_threads(mkl_get_max_threads());
-#endif
-}
-
-TH_API THDescBuff _THSizeDesc(const long *size, const long ndim) {
- const int L = TH_DESC_BUFF_LEN;
- THDescBuff buf;
- char *str = buf.str;
- int n = 0;
- n += snprintf(str, L-n, "[");
- int i;
- for(i = 0; i < ndim; i++) {
- if(n >= L) break;
- n += snprintf(str+n, L-n, "%ld", size[i]);
- if(i < ndim-1) {
- n += snprintf(str+n, L-n, " x ");
- }
- }
- if(n < L - 2) {
- snprintf(str+n, L-n, "]");
- } else {
- snprintf(str+L-5, 5, "...]");
- }
- return buf;
-}
-
diff --git a/contrib/lua-torch/torch7/lib/TH/THGeneral.h.in b/contrib/lua-torch/torch7/lib/TH/THGeneral.h.in
deleted file mode 100644
index 88a3934c8..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGeneral.h.in
+++ /dev/null
@@ -1,130 +0,0 @@
-#ifndef TH_GENERAL_INC
-#define TH_GENERAL_INC
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <math.h>
-#include <limits.h>
-#include <float.h>
-#include <time.h>
-#include <string.h>
-#include <stddef.h>
-
-#cmakedefine USE_BLAS
-#cmakedefine USE_LAPACK
-#cmakedefine BLAS_F2C
-
-#ifdef __cplusplus
-# define TH_EXTERNC extern "C"
-#else
-# define TH_EXTERNC extern
-#endif
-
-#ifdef _WIN32
-# ifdef TH_EXPORTS
-# define TH_API TH_EXTERNC __declspec(dllexport)
-# else
-# define TH_API TH_EXTERNC __declspec(dllimport)
-# endif
-#else
-# define TH_API TH_EXTERNC
-#endif
-
-#ifndef M_PI
-# define M_PI 3.14159265358979323846
-#endif
-
-#ifndef TH_INDEX_BASE
-#define TH_INDEX_BASE 1
-#endif
-
-typedef void (*THErrorHandlerFunction)(const char *msg, void *data);
-typedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data);
-
-#define TH_DESC_BUFF_LEN 64
-typedef struct {
- char str[TH_DESC_BUFF_LEN];
-} THDescBuff;
-
-
-TH_API double THLog1p(const double x);
-TH_API THDescBuff _THSizeDesc(const long *size, const long ndim);
-TH_API void _THError(const char *file, const int line, const char *fmt, ...);
-TH_API void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...);
-TH_API void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data);
-TH_API void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data);
-TH_API void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...);
-TH_API void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);
-TH_API void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);
-TH_API void* THAlloc(ptrdiff_t size);
-TH_API void* THRealloc(void *ptr, ptrdiff_t size);
-TH_API void THFree(void *ptr);
-TH_API void THSetGCHandler( void (*torchGCHandlerFunction)(void *data), void *data );
-// this hook should only be called by custom allocator functions
-TH_API void THHeapUpdate(ptrdiff_t size);
-TH_API void THSetNumThreads(int num_threads);
-TH_API int THGetNumThreads(void);
-TH_API int THGetNumCores(void);
-TH_API void THInferNumThreads(void);
-
-#define THError(...) _THError(__FILE__, __LINE__, __VA_ARGS__)
-
-#define THCleanup(...) __VA_ARGS__
-
-#define THArgCheck(...) \
-do { \
- _THArgCheck(__FILE__, __LINE__, __VA_ARGS__); \
-} while(0)
-
-#define THArgCheckWithCleanup(condition, cleanup, ...) \
-do if (!(condition)) { \
- cleanup \
- _THArgCheck(__FILE__, __LINE__, 0, __VA_ARGS__); \
-} while(0)
-
-#define THAssert(exp) \
-do { \
- if (!(exp)) { \
- _THAssertionFailed(__FILE__, __LINE__, #exp, ""); \
- } \
-} while(0)
-
-#define THAssertMsg(exp, ...) \
-do { \
- if (!(exp)) { \
- _THAssertionFailed(__FILE__, __LINE__, #exp, __VA_ARGS__); \
- } \
-} while(0)
-
-#define TH_CONCAT_STRING_2(x,y) TH_CONCAT_STRING_2_EXPAND(x,y)
-#define TH_CONCAT_STRING_2_EXPAND(x,y) #x #y
-
-#define TH_CONCAT_STRING_3(x,y,z) TH_CONCAT_STRING_3_EXPAND(x,y,z)
-#define TH_CONCAT_STRING_3_EXPAND(x,y,z) #x #y #z
-
-#define TH_CONCAT_STRING_4(x,y,z,w) TH_CONCAT_STRING_4_EXPAND(x,y,z,w)
-#define TH_CONCAT_STRING_4_EXPAND(x,y,z,w) #x #y #z #w
-
-#define TH_CONCAT_2(x,y) TH_CONCAT_2_EXPAND(x,y)
-#define TH_CONCAT_2_EXPAND(x,y) x ## y
-
-#define TH_CONCAT_3(x,y,z) TH_CONCAT_3_EXPAND(x,y,z)
-#define TH_CONCAT_3_EXPAND(x,y,z) x ## y ## z
-
-#define TH_CONCAT_4_EXPAND(x,y,z,w) x ## y ## z ## w
-#define TH_CONCAT_4(x,y,z,w) TH_CONCAT_4_EXPAND(x,y,z,w)
-
-#define THMin(X, Y) ((X) < (Y) ? (X) : (Y))
-#define THMax(X, Y) ((X) > (Y) ? (X) : (Y))
-
-#if (defined(_MSC_VER) || defined(__MINGW32__))
-# define log1p(x) THLog1p(x)
-#define snprintf _snprintf
-#define popen _popen
-#define pclose _pclose
-#include <BaseTsd.h>
-typedef SSIZE_T ssize_t;
-#endif
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateAllTypes.h b/contrib/lua-torch/torch7/lib/TH/THGenerateAllTypes.h
deleted file mode 100644
index 5b9508df7..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateAllTypes.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateAllTypes.h"
-#endif
-
-#ifndef THGenerateManyTypes
-#define THAllLocalGenerateManyTypes
-#define THGenerateManyTypes
-#endif
-
-#include "THGenerateFloatTypes.h"
-#include "THGenerateIntTypes.h"
-
-#ifdef THAllLocalGenerateManyTypes
-#undef THAllLocalGenerateManyTypes
-#undef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateByteType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateByteType.h
deleted file mode 100644
index 71ce7c405..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateByteType.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateByteType.h"
-#endif
-
-#define real unsigned char
-#define accreal long
-#define Real Byte
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)
-#define THInf UCHAR_MAX
-#define TH_REAL_IS_BYTE
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef real
-#undef accreal
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_BYTE
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateCharType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateCharType.h
deleted file mode 100644
index 158dd0e80..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateCharType.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateCharType.h"
-#endif
-
-#define real char
-#define accreal long
-#define Real Char
-#define THInf CHAR_MAX
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)
-#define TH_REAL_IS_CHAR
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef real
-#undef accreal
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_CHAR
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateDoubleType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateDoubleType.h
deleted file mode 100644
index fffee606d..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateDoubleType.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateDoubleType.h"
-#endif
-
-#define real double
-#define accreal double
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)
-#define Real Double
-#define THInf DBL_MAX
-#define TH_REAL_IS_DOUBLE
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef accreal
-#undef real
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_DOUBLE
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateFloatType.h
deleted file mode 100644
index a31b50c55..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatType.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateFloatType.h"
-#endif
-
-#define real float
-#define accreal double
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)
-#define Real Float
-#define THInf FLT_MAX
-#define TH_REAL_IS_FLOAT
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef accreal
-#undef real
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_FLOAT
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatTypes.h b/contrib/lua-torch/torch7/lib/TH/THGenerateFloatTypes.h
deleted file mode 100644
index be5ea8403..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatTypes.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h"
-#endif
-
-#ifndef THGenerateManyTypes
-#define THFloatLocalGenerateManyTypes
-#define THGenerateManyTypes
-#endif
-
-#include "THGenerateFloatType.h"
-#include "THGenerateDoubleType.h"
-
-#ifdef THFloatLocalGenerateManyTypes
-#undef THFloatLocalGenerateManyTypes
-#undef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateHalfType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateHalfType.h
deleted file mode 100644
index 47ff1e8d7..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateHalfType.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateHalfType.h"
-#endif
-
-#include "THHalf.h"
-#define real THHalf
-#define accreal float
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) TH_half2float(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) TH_float2half(_val)
-#define Real Half
-#define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF)
-#define TH_REAL_IS_HALF
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef real
-#undef accreal
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_HALF
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateIntType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateIntType.h
deleted file mode 100644
index 1562b9e98..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateIntType.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateIntType.h"
-#endif
-
-#define real int
-#define accreal long
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)
-#define Real Int
-#define THInf INT_MAX
-#define TH_REAL_IS_INT
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef real
-#undef accreal
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_INT
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateIntTypes.h b/contrib/lua-torch/torch7/lib/TH/THGenerateIntTypes.h
deleted file mode 100644
index 9931fb1f5..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateIntTypes.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateIntTypes.h"
-#endif
-
-#ifndef THGenerateManyTypes
-#define THIntLocalGenerateManyTypes
-#define THGenerateManyTypes
-#endif
-
-#include "THGenerateByteType.h"
-#include "THGenerateCharType.h"
-#include "THGenerateShortType.h"
-#include "THGenerateIntType.h"
-#include "THGenerateLongType.h"
-
-#ifdef THIntLocalGenerateManyTypes
-#undef THIntLocalGenerateManyTypes
-#undef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateLongType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateLongType.h
deleted file mode 100644
index 75f90e1a6..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateLongType.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateLongType.h"
-#endif
-
-#define real long
-#define accreal long
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)
-#define Real Long
-#define THInf LONG_MAX
-#define TH_REAL_IS_LONG
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef real
-#undef accreal
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_LONG
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateShortType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateShortType.h
deleted file mode 100644
index 047e51a8d..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THGenerateShortType.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#error "You must define TH_GENERIC_FILE before including THGenerateShortType.h"
-#endif
-
-#define real short
-#define accreal long
-#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
-#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)
-#define Real Short
-#define THInf SHRT_MAX
-#define TH_REAL_IS_SHORT
-#line 1 TH_GENERIC_FILE
-#include TH_GENERIC_FILE
-#undef real
-#undef accreal
-#undef Real
-#undef THInf
-#undef TH_REAL_IS_SHORT
-#undef TH_CONVERT_REAL_TO_ACCREAL
-#undef TH_CONVERT_ACCREAL_TO_REAL
-
-#ifndef THGenerateManyTypes
-#undef TH_GENERIC_FILE
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THHalf.c b/contrib/lua-torch/torch7/lib/TH/THHalf.c
deleted file mode 100644
index d7468ac3d..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THHalf.c
+++ /dev/null
@@ -1,100 +0,0 @@
-#include "THHalf.h"
-
-/* Copyright 1993-2014 NVIDIA Corporation. All rights reserved. */
-
-THHalf TH_float2half(float f)
-{
- THHalf h;
- TH_float2halfbits(&f, &h.x);
- return h;
-}
-
-TH_API float TH_half2float(THHalf h)
-{
- float f;
- TH_halfbits2float(&h.x, &f);
- return f;
-}
-
-// Host functions for converting between FP32 and FP16 formats
-
-void TH_halfbits2float(unsigned short* src, float* res)
-{
- unsigned h = *src;
- unsigned sign = ((h >> 15) & 1);
- unsigned exponent = ((h >> 10) & 0x1f);
- unsigned mantissa = ((h & 0x3ff) << 13);
-
- if (exponent == 0x1f) { /* NaN or Inf */
- mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
- exponent = 0xff;
- } else if (!exponent) { /* Denorm or Zero */
- if (mantissa) {
- unsigned int msb;
- exponent = 0x71;
- do {
- msb = (mantissa & 0x400000);
- mantissa <<= 1; /* normalize */
- --exponent;
- } while (!msb);
- mantissa &= 0x7fffff; /* 1.mantissa is implicit */
- }
- } else {
- exponent += 0x70;
- }
-
- *(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa);
-}
-
-void TH_float2halfbits(float* src, unsigned short* dest)
-{
- unsigned x = *(unsigned*)src;
- unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
- unsigned sign, exponent, mantissa;
-
- // Get rid of +NaN/-NaN case first.
- if (u > 0x7f800000) {
- *dest = 0x7fffU;
- return ;
- }
-
- sign = ((x >> 16) & 0x8000);
-
- // Get rid of +Inf/-Inf, +0/-0.
- if (u > 0x477fefff) {
- *dest = sign | 0x7c00U;
- return;
- }
- if (u < 0x33000001) {
- *dest = (sign | 0x0000);
- return;
- }
-
- exponent = ((u >> 23) & 0xff);
- mantissa = (u & 0x7fffff);
-
- if (exponent > 0x70) {
- shift = 13;
- exponent -= 0x70;
- } else {
- shift = 0x7e - exponent;
- exponent = 0;
- mantissa |= 0x800000;
- }
- lsb = (1 << shift);
- lsb_s1 = (lsb >> 1);
- lsb_m1 = (lsb - 1);
-
- // Round to nearest even.
- remainder = (mantissa & lsb_m1);
- mantissa >>= shift;
- if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
- ++mantissa;
- if (!(mantissa & 0x3ff)) {
- ++exponent;
- mantissa = 0;
- }
- }
-
- *dest = (sign | (exponent << 10) | mantissa);
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THHalf.h b/contrib/lua-torch/torch7/lib/TH/THHalf.h
deleted file mode 100644
index 0f9807b50..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THHalf.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef TH_HALF_H
-#define TH_HALF_H
-
-#include "THGeneral.h"
-#include <stdint.h>
-
-/* Neither built-in nor included from Cutorch, use our definition lifted from CUDA */
-#if defined(__GNUC__)
-#define __thalign__(n) __attribute__((aligned(n)))
-#elif defined(_WIN32)
-#define __thalign__(n) __declspec(align(n))
-#else
-#define __thalign__(n)
-#endif
-
-typedef struct __thalign__(2){
- unsigned short x;
-} __THHalf;
-
-typedef struct __thalign__(4) {
- unsigned int x;
-} __THHalf2;
-
-typedef __THHalf THHalf;
-typedef __THHalf2 THHalf2;
-
-TH_API void TH_float2halfbits(float*, unsigned short*);
-TH_API void TH_halfbits2float(unsigned short*, float*);
-
-TH_API THHalf TH_float2half(float);
-TH_API float TH_half2float(THHalf);
-
-#ifndef TH_HALF_BITS_TO_LITERAL
-# define TH_HALF_BITS_TO_LITERAL(n) { n }
-#endif
-
-#define TH_HALF_ZERO 0x0U
-#define TH_HALF_INF 0x7C00U
-
-#undef __thalign__
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THLapack.c b/contrib/lua-torch/torch7/lib/TH/THLapack.c
deleted file mode 100644
index bd4dc716b..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THLapack.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "THLapack.h"
-
-#include "generic/THLapack.c"
-#include "THGenerateFloatTypes.h"
diff --git a/contrib/lua-torch/torch7/lib/TH/THLapack.h b/contrib/lua-torch/torch7/lib/TH/THLapack.h
deleted file mode 100644
index 614d15f94..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THLapack.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef TH_LAPACK_INC
-#define TH_LAPACK_INC
-
-#include "THGeneral.h"
-
-#define THLapack_(NAME) TH_CONCAT_4(TH,Real,Lapack_,NAME)
-
-#define THLapackCheck(fmt, func, info , ...) \
-if (info < 0) { \
- THError("Lapack Error in %s : Illegal Argument %d", func, -info); \
-} else if(info > 0) { \
- THError(fmt, func, info, ##__VA_ARGS__); \
-} \
-
-#define THLapackCheckWithCleanup(fmt, cleanup, func, info , ...) \
-if (info < 0) { \
- cleanup \
- THError("Lapack Error in %s : Illegal Argument %d", func, -info); \
-} else if(info > 0) { \
- cleanup \
- THError(fmt, func, info, ##__VA_ARGS__); \
-}
-
-#include "generic/THLapack.h"
-#include "THGenerateAllTypes.h"
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THLogAdd.c b/contrib/lua-torch/torch7/lib/TH/THLogAdd.c
deleted file mode 100644
index 4b14f8540..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THLogAdd.c
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "THLogAdd.h"
-
-#include <float.h>
-
-#ifdef USE_DOUBLE
-#define MINUS_LOG_THRESHOLD -39.14
-#else
-#define MINUS_LOG_THRESHOLD -18.42
-#endif
-
-const double THLog2Pi=1.83787706640934548355;
-const double THLogZero=-DBL_MAX;
-const double THLogOne=0;
-
-double THLogAdd(double log_a, double log_b)
-{
- double minusdif;
-
- if (log_a < log_b)
- {
- double tmp = log_a;
- log_a = log_b;
- log_b = tmp;
- }
-
- minusdif = log_b - log_a;
-#ifdef DEBUG
- if (isnan(minusdif))
- THError("THLogAdd: minusdif (%f) log_b (%f) or log_a (%f) is nan", minusdif, log_b, log_a);
-#endif
- if (minusdif < MINUS_LOG_THRESHOLD)
- return log_a;
- else
- return log_a + log1p(exp(minusdif));
-}
-
-double THLogSub(double log_a, double log_b)
-{
- double minusdif;
-
- if (log_a < log_b)
- THError("LogSub: log_a (%f) should be greater than log_b (%f)", log_a, log_b);
-
- minusdif = log_b - log_a;
-#ifdef DEBUG
- if (isnan(minusdif))
- THError("LogSub: minusdif (%f) log_b (%f) or log_a (%f) is nan", minusdif, log_b, log_a);
-#endif
- if (log_a == log_b)
- return THLogZero;
- else if (minusdif < MINUS_LOG_THRESHOLD)
- return log_a;
- else
- return log_a + log1p(-exp(minusdif));
-}
-
-/* Credits to Leon Bottou */
-double THExpMinusApprox(const double x)
-{
-#define EXACT_EXPONENTIAL 0
-#if EXACT_EXPONENTIAL
- return exp(-x);
-#else
- /* fast approximation of exp(-x) for x positive */
-# define A0 (1.0)
-# define A1 (0.125)
-# define A2 (0.0078125)
-# define A3 (0.00032552083)
-# define A4 (1.0172526e-5)
- if (x < 13.0)
- {
-/* assert(x>=0); */
- double y;
- y = A0+x*(A1+x*(A2+x*(A3+x*A4)));
- y *= y;
- y *= y;
- y *= y;
- y = 1/y;
- return y;
- }
- return 0;
-# undef A0
-# undef A1
-# undef A2
-# undef A3
-# undef A4
-#endif
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THLogAdd.h b/contrib/lua-torch/torch7/lib/TH/THLogAdd.h
deleted file mode 100644
index 9319b8f46..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THLogAdd.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef TH_LOG_ADD_INC
-#define TH_LOG_ADD_INC
-
-#include "THGeneral.h"
-
-TH_API const double THLog2Pi;
-TH_API const double THLogZero;
-TH_API const double THLogOne;
-
-TH_API double THLogAdd(double log_a, double log_b);
-TH_API double THLogSub(double log_a, double log_b);
-TH_API double THExpMinusApprox(const double x);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THMath.h b/contrib/lua-torch/torch7/lib/TH/THMath.h
deleted file mode 100644
index 004e4fe45..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THMath.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _THMATH_H
-#define _THMATH_H
-
-static inline double TH_sigmoid(double value) {
- return 1.0 / (1.0 + exp(-value));
-}
-
-static inline double TH_frac(double x) {
- return x - trunc(x);
-}
-
-static inline double TH_rsqrt(double x) {
- return 1.0 / sqrt(x);
-}
-
-static inline double TH_lerp(double a, double b, double weight) {
- return a + weight * (b-a);
-}
-
-static inline float TH_sigmoidf(float value) {
- return 1.0f / (1.0f + expf(-value));
-}
-
-static inline float TH_fracf(float x) {
- return x - truncf(x);
-}
-
-static inline float TH_rsqrtf(float x) {
- return 1.0f / sqrtf(x);
-}
-
-static inline float TH_lerpf(float a, float b, float weight) {
- return a + weight * (b-a);
-}
-
-#endif // _THMATH_H
diff --git a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.c b/contrib/lua-torch/torch7/lib/TH/THMemoryFile.c
deleted file mode 100644
index ecce6e1b1..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.c
+++ /dev/null
@@ -1,685 +0,0 @@
-#include "THMemoryFile.h"
-#include "THFilePrivate.h"
-#include "stdint.h"
-
-typedef struct THMemoryFile__
-{
- THFile file;
- THCharStorage *storage;
- size_t size;
- size_t position;
- int longSize;
-
-} THMemoryFile;
-
-static int THMemoryFile_isOpened(THFile *self)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
- return (mfself->storage != NULL);
-}
-
-static char *THMemoryFile_strnextspace(char *str_, char *c_)
-{
- char c;
-
- while( (c = *str_) )
- {
- if( (c != ' ') && (c != '\n') && (c != ':') && (c != ';') )
- break;
- str_++;
- }
-
- while( (c = *str_) )
- {
- if( (c == ' ') || (c == '\n') || (c == ':') || (c == ';') )
- {
- *c_ = c;
- *str_ = '\0';
- return(str_);
- }
- str_++;
- }
- return NULL;
-}
-
-static void THMemoryFile_grow(THMemoryFile *self, size_t size)
-{
- size_t missingSpace;
-
- if(size <= self->size)
- return;
- else
- {
- if(size < self->storage->size) /* note the "<" and not "<=" */
- {
- self->size = size;
- self->storage->data[self->size] = '\0';
- return;
- }
- }
-
- missingSpace = size-self->storage->size+1; /* +1 for the '\0' */
- THCharStorage_resize(self->storage, (self->storage->size/2 > missingSpace ?
- self->storage->size + (self->storage->size/2)
- : self->storage->size + missingSpace));
-}
-
-static int THMemoryFile_mode(const char *mode, int *isReadable, int *isWritable)
-{
- *isReadable = 0;
- *isWritable = 0;
- if(strlen(mode) == 1)
- {
- if(*mode == 'r')
- {
- *isReadable = 1;
- return 1;
- }
- else if(*mode == 'w')
- {
- *isWritable = 1;
- return 1;
- }
- }
- else if(strlen(mode) == 2)
- {
- if(mode[0] == 'r' && mode[1] == 'w')
- {
- *isReadable = 1;
- *isWritable = 1;
- return 1;
- }
- }
- return 0;
-}
-
-/********************************************************/
-
-#define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM, INSIDE_SPACING) \
- static size_t THMemoryFile_read##TYPEC(THFile *self, TYPE *data, size_t n) \
- { \
- THMemoryFile *mfself = (THMemoryFile*)self; \
- size_t nread = 0; \
- \
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); \
- THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file"); \
- \
- if (n == 0) \
- return 0; \
- \
- if(mfself->file.isBinary) \
- { \
- size_t nByte = sizeof(TYPE)*n; \
- size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); \
- nread = nByteRemaining/sizeof(TYPE); \
- memmove(data, mfself->storage->data+mfself->position, nread*sizeof(TYPE)); \
- mfself->position += nread*sizeof(TYPE); \
- } \
- else \
- { \
- size_t i; \
- for(i = 0; i < n; i++) \
- { \
- size_t nByteRead = 0; \
- char spaceChar = 0; \
- char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar); \
- ASCII_READ_ELEM; \
- if(ret == EOF) \
- { \
- while(mfself->storage->data[mfself->position]) \
- mfself->position++; \
- } \
- else \
- mfself->position += nByteRead; \
- if(spacePtr) \
- *spacePtr = spaceChar; \
- } \
- if(mfself->file.isAutoSpacing && (n > 0)) \
- { \
- if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\n') ) \
- mfself->position++; \
- } \
- } \
- \
- if(nread != n) \
- { \
- mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \
- if(!mfself->file.isQuiet) \
- THError("read error: read %d blocks instead of %d", nread, n); \
- } \
- \
- return nread; \
- } \
- \
- static size_t THMemoryFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \
- { \
- THMemoryFile *mfself = (THMemoryFile*)self; \
- \
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); \
- THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file"); \
- \
- if (n == 0) \
- return 0; \
- \
- if(mfself->file.isBinary) \
- { \
- size_t nByte = sizeof(TYPE)*n; \
- THMemoryFile_grow(mfself, mfself->position+nByte); \
- memmove(mfself->storage->data+mfself->position, data, nByte); \
- mfself->position += nByte; \
- if(mfself->position > mfself->size) \
- { \
- mfself->size = mfself->position; \
- mfself->storage->data[mfself->size] = '\0'; \
- } \
- } \
- else \
- { \
- size_t i; \
- for(i = 0; i < n; i++) \
- { \
- ssize_t nByteWritten; \
- while (1) \
- { \
- ASCII_WRITE_ELEM; \
- if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) ) \
- { \
- mfself->position += nByteWritten; \
- break; \
- } \
- THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2); \
- } \
- if(mfself->file.isAutoSpacing) \
- { \
- if(i < n-1) \
- { \
- THMemoryFile_grow(mfself, mfself->position+1); \
- sprintf(mfself->storage->data+mfself->position, " "); \
- mfself->position++; \
- } \
- if(i == n-1) \
- { \
- THMemoryFile_grow(mfself, mfself->position+1); \
- sprintf(mfself->storage->data+mfself->position, "\n"); \
- mfself->position++; \
- } \
- } \
- } \
- if(mfself->position > mfself->size) \
- { \
- mfself->size = mfself->position; \
- mfself->storage->data[mfself->size] = '\0'; \
- } \
- } \
- \
- return n; \
- }
-
-
-void THMemoryFile_longSize(THFile *self, int size)
-{
- THMemoryFile *dfself = (THMemoryFile*)(self);
- THArgCheck(size == 0 || size == 4 || size == 8, 1, "Invalid long size specified");
- dfself->longSize = size;
-}
-
-THCharStorage *THMemoryFile_storage(THFile *self)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
-
- THCharStorage_resize(mfself->storage, mfself->size+1);
-
- return mfself->storage;
-}
-
-static void THMemoryFile_synchronize(THFile *self)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
-}
-
-static void THMemoryFile_seek(THFile *self, size_t position)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
-
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
- THArgCheck(position >= 0, 2, "position must be positive");
-
- if(position <= mfself->size)
- mfself->position = position;
- else
- {
- mfself->file.hasError = 1;
- if(!mfself->file.isQuiet)
- THError("unable to seek at position %zu", position);
- }
-}
-
-static void THMemoryFile_seekEnd(THFile *self)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
-
- mfself->position = mfself->size;
-}
-
-static size_t THMemoryFile_position(THFile *self)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
- return mfself->position;
-}
-
-static void THMemoryFile_close(THFile *self)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
- THCharStorage_free(mfself->storage);
- mfself->storage = NULL;
-}
-
-static void THMemoryFile_free(THFile *self)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
-
- if(mfself->storage)
- THCharStorage_free(mfself->storage);
-
- THFree(mfself);
-}
-
-/* READ_WRITE_METHODS(bool, Bool, */
-/* int value = 0; int ret = sscanf(mfself->storage->data+mfself->position, "%d%n", &value, &nByteRead); data[i] = (value ? 1 : 0), */
-/* int value = (data[i] ? 1 : 0); nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%d", value), */
-/* 1) */
-
-READ_WRITE_METHODS(unsigned char, Byte,
- size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position); \
- if(spacePtr) *spacePtr = spaceChar; \
- nByteRead = ret; \
- nread = ret; \
- i = n-1; \
- memmove(data, mfself->storage->data+mfself->position, nByteRead),
- nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \
- i = n-1; \
- if(nByteWritten > -1)
- memmove(mfself->storage->data+mfself->position, data, nByteWritten),
- 0)
-
-/* DEBUG: we should check if %n is count or not as a element (so ret might need to be ret-- on some systems) */
-/* Note that we do a trick for char */
-READ_WRITE_METHODS(char, Char,
- size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position); \
- if(spacePtr) *spacePtr = spaceChar; \
- nByteRead = ret; \
- nread = ret; \
- i = n-1; \
- memmove(data, mfself->storage->data+mfself->position, nByteRead),
- nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \
- i = n-1; \
- if(nByteWritten > -1)
- memmove(mfself->storage->data+mfself->position, data, nByteWritten),
- 0)
-
-READ_WRITE_METHODS(short, Short,
- int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%hd%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,
- nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%hd", data[i]),
- 1)
-
-READ_WRITE_METHODS(int, Int,
- int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%d%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,
- nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%d", data[i]),
- 1)
-
-READ_WRITE_METHODS(float, Float,
- int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%g%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,
- nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.9g", data[i]),
- 1)
-
-READ_WRITE_METHODS(THHalf, Half,
- int nByteRead_; float buf; \
- int ret = sscanf(mfself->storage->data+mfself->position, "%g%n", &buf, &nByteRead_); \
- data[i] = TH_float2half(buf); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,
- nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.9g", TH_half2float(data[i])),
- 1)
-
-READ_WRITE_METHODS(double, Double,
- int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%lg%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,
- nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.17g", data[i]),
- 1)
-
-int THDiskFile_isLittleEndianCPU(void);
-
-static size_t THMemoryFile_readLong(THFile *self, long *data, size_t n)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
- size_t nread = 0L;
-
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
- THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file");
-
- if (n == 0)
- return 0;
-
- if(mfself->file.isBinary)
- {
- if(mfself->longSize == 0 || mfself->longSize == sizeof(long))
- {
- size_t nByte = sizeof(long)*n;
- size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position);
- nread = nByteRemaining/sizeof(long);
- memmove(data, mfself->storage->data+mfself->position, nread*sizeof(long));
- mfself->position += nread*sizeof(long);
- } else if(mfself->longSize == 4)
- {
- size_t nByte = 4*n;
- size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position);
- int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);
- nread = nByteRemaining/4;
- size_t i;
- for(i = 0; i < nread; i++)
- data[i] = storage[i];
- mfself->position += nread*4;
- }
- else /* if(mfself->longSize == 8) */
- {
- int big_endian = !THDiskFile_isLittleEndianCPU();
- size_t nByte = 8*n;
- int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);
- size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position);
- nread = nByteRemaining/8;
- size_t i;
- for(i = 0; i < nread; i++)
- data[i] = storage[2*i + big_endian];
- mfself->position += nread*8;
- }
- }
- else
- {
- size_t i;
- for(i = 0; i < n; i++)
- {
- size_t nByteRead = 0;
- char spaceChar = 0;
- char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar);
- int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%ld%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++;
- if(ret == EOF)
- {
- while(mfself->storage->data[mfself->position])
- mfself->position++;
- }
- else
- mfself->position += nByteRead;
- if(spacePtr)
- *spacePtr = spaceChar;
- }
- if(mfself->file.isAutoSpacing && (n > 0))
- {
- if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\n') )
- mfself->position++;
- }
- }
-
- if(nread != n)
- {
- mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */
- if(!mfself->file.isQuiet)
- THError("read error: read %d blocks instead of %d", nread, n);
- }
-
- return nread;
-}
-
-static size_t THMemoryFile_writeLong(THFile *self, long *data, size_t n)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
-
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
- THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file");
-
- if (n == 0)
- return 0;
-
- if(mfself->file.isBinary)
- {
- if(mfself->longSize == 0 || mfself->longSize == sizeof(long))
- {
- size_t nByte = sizeof(long)*n;
- THMemoryFile_grow(mfself, mfself->position+nByte);
- memmove(mfself->storage->data+mfself->position, data, nByte);
- mfself->position += nByte;
- } else if(mfself->longSize == 4)
- {
- size_t nByte = 4*n;
- THMemoryFile_grow(mfself, mfself->position+nByte);
- int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);
- size_t i;
- for(i = 0; i < n; i++)
- storage[i] = data[i];
- mfself->position += nByte;
- }
- else /* if(mfself->longSize == 8) */
- {
- int big_endian = !THDiskFile_isLittleEndianCPU();
- size_t nByte = 8*n;
- THMemoryFile_grow(mfself, mfself->position+nByte);
- int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);
- size_t i;
- for(i = 0; i < n; i++)
- {
- storage[2*i + !big_endian] = 0;
- storage[2*i + big_endian] = data[i];
- }
- mfself->position += nByte;
- }
- if(mfself->position > mfself->size)
- {
- mfself->size = mfself->position;
- mfself->storage->data[mfself->size] = '\0';
- }
- }
- else
- {
- size_t i;
- for(i = 0; i < n; i++)
- {
- ssize_t nByteWritten;
- while (1)
- {
- nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%ld", data[i]);
- if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) )
- {
- mfself->position += nByteWritten;
- break;
- }
- THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2);
- }
- if(mfself->file.isAutoSpacing)
- {
- if(i < n-1)
- {
- THMemoryFile_grow(mfself, mfself->position+1);
- sprintf(mfself->storage->data+mfself->position, " ");
- mfself->position++;
- }
- if(i == n-1)
- {
- THMemoryFile_grow(mfself, mfself->position+1);
- sprintf(mfself->storage->data+mfself->position, "\n");
- mfself->position++;
- }
- }
- }
- if(mfself->position > mfself->size)
- {
- mfself->size = mfself->position;
- mfself->storage->data[mfself->size] = '\0';
- }
- }
-
- return n;
-}
-
-static char* THMemoryFile_cloneString(const char *str, ptrdiff_t size)
-{
- char *cstr = THAlloc(size);
- memcpy(cstr, str, size);
- return cstr;
-}
-
-static size_t THMemoryFile_readString(THFile *self, const char *format, char **str_)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
-
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
- THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file");
- THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, "format must be '*a' or '*l'");
-
- if(mfself->position == mfself->size) /* eof ? */
- {
- mfself->file.hasError = 1;
- if(!mfself->file.isQuiet)
- THError("read error: read 0 blocks instead of 1");
-
- *str_ = NULL;
- return 0;
- }
-
- if(format[1] == 'a')
- {
- size_t str_size = mfself->size-mfself->position;
-
- *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size);
- mfself->position = mfself->size;
-
- return str_size;
- }
- else
- {
- char *p = mfself->storage->data+mfself->position;
- int eolFound = 0;
- size_t posEol;
- size_t i;
- for(i = 0; i < mfself->size-mfself->position; i++)
- {
- if(p[i] == '\n')
- {
- posEol = i;
- eolFound = 1;
- break;
- }
- }
-
- if(eolFound)
- {
- *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, posEol);
- mfself->position += posEol+1;
- return posEol;
- }
- else /* well, we read all! */
- {
- size_t str_size = mfself->size-mfself->position;
-
- *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size);
- mfself->position = mfself->size;
-
- return str_size;
- }
- }
-
- *str_ = NULL;
- return 0;
-}
-
-static size_t THMemoryFile_writeString(THFile *self, const char *str, size_t size)
-{
- THMemoryFile *mfself = (THMemoryFile*)self;
-
- THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file");
- THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file");
-
- THMemoryFile_grow(mfself, mfself->position+size);
- memmove(mfself->storage->data+mfself->position, str, size);
- mfself->position += size;
- if(mfself->position > mfself->size)
- {
- mfself->size = mfself->position;
- mfself->storage->data[mfself->size] = '\0';
- }
-
- return size;
-}
-
-THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode)
-{
- static struct THFileVTable vtable = {
- THMemoryFile_isOpened,
-
- THMemoryFile_readByte,
- THMemoryFile_readChar,
- THMemoryFile_readShort,
- THMemoryFile_readInt,
- THMemoryFile_readLong,
- THMemoryFile_readFloat,
- THMemoryFile_readDouble,
- THMemoryFile_readHalf,
- THMemoryFile_readString,
-
- THMemoryFile_writeByte,
- THMemoryFile_writeChar,
- THMemoryFile_writeShort,
- THMemoryFile_writeInt,
- THMemoryFile_writeLong,
- THMemoryFile_writeFloat,
- THMemoryFile_writeDouble,
- THMemoryFile_writeHalf,
- THMemoryFile_writeString,
-
- THMemoryFile_synchronize,
- THMemoryFile_seek,
- THMemoryFile_seekEnd,
- THMemoryFile_position,
- THMemoryFile_close,
- THMemoryFile_free
- };
-
- THMemoryFile *mfself;
- int isReadable;
- int isWritable;
-
- if(storage)
- {
- THArgCheck(storage->data[storage->size-1] == '\0', 1, "provided CharStorage must be terminated by 0");
- THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'");
- THCharStorage_retain(storage);
- }
- else
- {
- THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'");
- storage = THCharStorage_newWithSize(1);
- storage->data[0] = '\0';
- }
-
- mfself = THAlloc(sizeof(THMemoryFile));
-
- mfself->storage = storage;
- mfself->size = (storage ? storage->size-1 : 0);
- mfself->position = 0;
- mfself->longSize = 0;
-
- mfself->file.vtable = &vtable;
- mfself->file.isQuiet = 0;
- mfself->file.isReadable = isReadable;
- mfself->file.isWritable = isWritable;
- mfself->file.isBinary = 0;
- mfself->file.isAutoSpacing = 1;
- mfself->file.hasError = 0;
-
- return (THFile*)mfself;
-}
-
-THFile *THMemoryFile_new(const char *mode)
-{
- return THMemoryFile_newWithStorage(NULL, mode);
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.h b/contrib/lua-torch/torch7/lib/TH/THMemoryFile.h
deleted file mode 100644
index b54cdcc2f..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef TH_MEMORY_FILE_INC
-#define TH_MEMORY_FILE_INC
-
-#include "THFile.h"
-#include "THStorage.h"
-
-TH_API THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode);
-TH_API THFile *THMemoryFile_new(const char *mode);
-
-TH_API THCharStorage *THMemoryFile_storage(THFile *self);
-TH_API void THMemoryFile_longSize(THFile *self, int size);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THRandom.c b/contrib/lua-torch/torch7/lib/TH/THRandom.c
deleted file mode 100644
index 86d721e7b..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THRandom.c
+++ /dev/null
@@ -1,272 +0,0 @@
-#include "THGeneral.h"
-#include "THRandom.h"
-
-/* Code for the Mersenne Twister random generator.... */
-#define n _MERSENNE_STATE_N
-#define m _MERSENNE_STATE_M
-
-/* Creates (unseeded) new generator*/
-static THGenerator* THGenerator_newUnseeded(void)
-{
- THGenerator *self = THAlloc(sizeof(THGenerator));
- memset(self, 0, sizeof(THGenerator));
- self->left = 1;
- self->seeded = 0;
- self->normal_is_valid = 0;
- return self;
-}
-
-/* Creates new generator and makes sure it is seeded*/
-THGenerator* THGenerator_new(void)
-{
- THGenerator *self = THGenerator_newUnseeded();
- THRandom_seed(self);
- return self;
-}
-
-THGenerator* THGenerator_copy(THGenerator *self, THGenerator *from)
-{
- memcpy(self, from, sizeof(THGenerator));
- return self;
-}
-
-void THGenerator_free(THGenerator *self)
-{
- THFree(self);
-}
-
-int THGenerator_isValid(THGenerator *_generator)
-{
- if ((_generator->seeded == 1) &&
- (_generator->left > 0 && _generator->left <= n) && (_generator->next <= n))
- return 1;
-
- return 0;
-}
-
-#ifndef _WIN32
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-static unsigned long readURandomLong()
-{
- int randDev = open("/dev/urandom", O_RDONLY);
- unsigned long randValue;
- if (randDev < 0) {
- THError("Unable to open /dev/urandom");
- }
- ssize_t readBytes = read(randDev, &randValue, sizeof(randValue));
- if (readBytes < sizeof(randValue)) {
- THError("Unable to read from /dev/urandom");
- }
- close(randDev);
- return randValue;
-}
-#endif // _WIN32
-
-unsigned long THRandom_seed(THGenerator *_generator)
-{
-#ifdef _WIN32
- unsigned long s = (unsigned long)time(0);
-#else
- unsigned long s = readURandomLong();
-#endif
- THRandom_manualSeed(_generator, s);
- return s;
-}
-
-/* The next 4 methods are taken from http:www.math.keio.ac.jpmatumotoemt.html
- Here is the copyright:
- Some minor modifications have been made to adapt to "my" C... */
-
-/*
- A C-program for MT19937, with initialization improved 2002/2/10.
- Coded by Takuji Nishimura and Makoto Matsumoto.
- This is a faster version by taking Shawn Cokus's optimization,
- Matthe Bellew's simplification, Isaku Wada's double version.
-
- Before using, initialize the state by using init_genrand(seed)
- or init_by_array(init_key, key_length).
-
- Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- 3. The names of its contributors may not be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
- Any feedback is very welcome.
- http://www.math.keio.ac.jp/matumoto/emt.html
- email: matumoto@math.keio.ac.jp
-*/
-
-/* Macros for the Mersenne Twister random generator... */
-/* Period parameters */
-/* #define n 624 */
-/* #define m 397 */
-#define MATRIX_A 0x9908b0dfUL /* constant vector a */
-#define UMASK 0x80000000UL /* most significant w-r bits */
-#define LMASK 0x7fffffffUL /* least significant r bits */
-#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) )
-#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL))
-/*********************************************************** That's it. */
-
-void THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_)
-{
- int j;
-
- /* This ensures reseeding resets all of the state (i.e. state for Gaussian numbers) */
- THGenerator *blank = THGenerator_newUnseeded();
- THGenerator_copy(_generator, blank);
- THGenerator_free(blank);
-
- _generator->the_initial_seed = the_seed_;
- _generator->state[0] = _generator->the_initial_seed & 0xffffffffUL;
- for(j = 1; j < n; j++)
- {
- _generator->state[j] = (1812433253UL * (_generator->state[j-1] ^ (_generator->state[j-1] >> 30)) + j);
- /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
- /* In the previous versions, mSBs of the seed affect */
- /* only mSBs of the array state[]. */
- /* 2002/01/09 modified by makoto matsumoto */
- _generator->state[j] &= 0xffffffffUL; /* for >32 bit machines */
- }
- _generator->left = 1;
- _generator->seeded = 1;
-}
-
-unsigned long THRandom_initialSeed(THGenerator *_generator)
-{
- return _generator->the_initial_seed;
-}
-
-void THRandom_nextState(THGenerator *_generator)
-{
- unsigned long *p = _generator->state;
- int j;
-
- _generator->left = n;
- _generator->next = 0;
-
- for(j = n-m+1; --j; p++)
- *p = p[m] ^ TWIST(p[0], p[1]);
-
- for(j = m; --j; p++)
- *p = p[m-n] ^ TWIST(p[0], p[1]);
-
- *p = p[m-n] ^ TWIST(p[0], _generator->state[0]);
-}
-
-unsigned long THRandom_random(THGenerator *_generator)
-{
- unsigned long y;
-
- if (--(_generator->left) == 0)
- THRandom_nextState(_generator);
- y = *(_generator->state + (_generator->next)++);
-
- /* Tempering */
- y ^= (y >> 11);
- y ^= (y << 7) & 0x9d2c5680UL;
- y ^= (y << 15) & 0xefc60000UL;
- y ^= (y >> 18);
-
- return y;
-}
-
-/* generates a random number on [0,1)-double-interval */
-static double __uniform__(THGenerator *_generator)
-{
- /* divided by 2^32 */
- return (double)THRandom_random(_generator) * (1.0/4294967296.0);
-}
-
-/*********************************************************
-
- Thanks *a lot* Takuji Nishimura and Makoto Matsumoto!
-
- Now my own code...
-
-*********************************************************/
-
-double THRandom_uniform(THGenerator *_generator, double a, double b)
-{
- return(__uniform__(_generator) * (b - a) + a);
-}
-
-double THRandom_normal(THGenerator *_generator, double mean, double stdv)
-{
- THArgCheck(stdv > 0, 2, "standard deviation must be strictly positive");
-
- /* This is known as the Box-Muller method */
- if(!_generator->normal_is_valid)
- {
- _generator->normal_x = __uniform__(_generator);
- _generator->normal_y = __uniform__(_generator);
- _generator->normal_rho = sqrt(-2. * log(1.0-_generator->normal_y));
- _generator->normal_is_valid = 1;
- }
- else
- _generator->normal_is_valid = 0;
-
- if(_generator->normal_is_valid)
- return _generator->normal_rho*cos(2.*M_PI*_generator->normal_x)*stdv+mean;
- else
- return _generator->normal_rho*sin(2.*M_PI*_generator->normal_x)*stdv+mean;
-}
-
-double THRandom_exponential(THGenerator *_generator, double lambda)
-{
- return(-1. / lambda * log(1-__uniform__(_generator)));
-}
-
-double THRandom_cauchy(THGenerator *_generator, double median, double sigma)
-{
- return(median + sigma * tan(M_PI*(__uniform__(_generator)-0.5)));
-}
-
-/* Faut etre malade pour utiliser ca.
- M'enfin. */
-double THRandom_logNormal(THGenerator *_generator, double mean, double stdv)
-{
- THArgCheck(stdv > 0, 2, "standard deviation must be strictly positive");
- return(exp(THRandom_normal(_generator, mean, stdv)));
-}
-
-int THRandom_geometric(THGenerator *_generator, double p)
-{
- THArgCheck(p > 0 && p < 1, 1, "must be > 0 and < 1");
- return((int)(log(1-__uniform__(_generator)) / log(p)) + 1);
-}
-
-int THRandom_bernoulli(THGenerator *_generator, double p)
-{
- THArgCheck(p >= 0 && p <= 1, 1, "must be >= 0 and <= 1");
- return(__uniform__(_generator) <= p);
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THRandom.h b/contrib/lua-torch/torch7/lib/TH/THRandom.h
deleted file mode 100644
index 28a14c0d7..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THRandom.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef TH_RANDOM_INC
-#define TH_RANDOM_INC
-
-#include "THGeneral.h"
-
-#define _MERSENNE_STATE_N 624
-#define _MERSENNE_STATE_M 397
-/* A THGenerator contains all the state required for a single random number stream */
-typedef struct THGenerator {
- /* The initial seed. */
- unsigned long the_initial_seed;
- int left; /* = 1; */
- int seeded; /* = 0; */
- unsigned long next;
- unsigned long state[_MERSENNE_STATE_N]; /* the array for the state vector */
- /********************************/
-
- /* For normal distribution */
- double normal_x;
- double normal_y;
- double normal_rho;
- int normal_is_valid; /* = 0; */
-} THGenerator;
-
-#define torch_Generator "torch.Generator"
-
-/* Manipulate THGenerator objects */
-TH_API THGenerator * THGenerator_new(void);
-TH_API THGenerator * THGenerator_copy(THGenerator *self, THGenerator *from);
-TH_API void THGenerator_free(THGenerator *gen);
-
-/* Checks if given generator is valid */
-TH_API int THGenerator_isValid(THGenerator *_generator);
-
-/* Initializes the random number generator from /dev/urandom (or on Windows
-platforms with the current time (granularity: seconds)) and returns the seed. */
-TH_API unsigned long THRandom_seed(THGenerator *_generator);
-
-/* Initializes the random number generator with the given long "the_seed_". */
-TH_API void THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_);
-
-/* Returns the starting seed used. */
-TH_API unsigned long THRandom_initialSeed(THGenerator *_generator);
-
-/* Generates a uniform 32 bits integer. */
-TH_API unsigned long THRandom_random(THGenerator *_generator);
-
-/* Generates a uniform random number on [0,1[. */
-TH_API double THRandom_uniform(THGenerator *_generator, double a, double b);
-
-/** Generates a random number from a normal distribution.
- (With mean #mean# and standard deviation #stdv >= 0#).
-*/
-TH_API double THRandom_normal(THGenerator *_generator, double mean, double stdv);
-
-/** Generates a random number from an exponential distribution.
- The density is $p(x) = lambda * exp(-lambda * x)$, where
- lambda is a positive number.
-*/
-TH_API double THRandom_exponential(THGenerator *_generator, double lambda);
-
-/** Returns a random number from a Cauchy distribution.
- The Cauchy density is $p(x) = sigma/(pi*(sigma^2 + (x-median)^2))$
-*/
-TH_API double THRandom_cauchy(THGenerator *_generator, double median, double sigma);
-
-/** Generates a random number from a log-normal distribution.
- (#mean > 0# is the mean of the log-normal distribution
- and #stdv# is its standard deviation).
-*/
-TH_API double THRandom_logNormal(THGenerator *_generator, double mean, double stdv);
-
-/** Generates a random number from a geometric distribution.
- It returns an integer #i#, where $p(i) = (1-p) * p^(i-1)$.
- p must satisfy $0 < p < 1$.
-*/
-TH_API int THRandom_geometric(THGenerator *_generator, double p);
-
-/* Returns true with probability $p$ and false with probability $1-p$ (p > 0). */
-TH_API int THRandom_bernoulli(THGenerator *_generator, double p);
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THSize.c b/contrib/lua-torch/torch7/lib/TH/THSize.c
deleted file mode 100644
index ccf1f61dd..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THSize.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "THSize.h"
-
-int THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB) {
- int d;
- if (dimsA != dimsB)
- return 0;
- for(d = 0; d < dimsA; ++d)
- {
- if(sizeA[d] != sizeB[d])
- return 0;
- }
- return 1;
-}
-
-ptrdiff_t THSize_nElement(long dims, long *size) {
- if(dims == 0)
- return 0;
- else
- {
- ptrdiff_t nElement = 1;
- int d;
- for(d = 0; d < dims; d++)
- nElement *= size[d];
- return nElement;
- }
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THSize.h b/contrib/lua-torch/torch7/lib/TH/THSize.h
deleted file mode 100644
index 3d39696f6..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THSize.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef TH_SIZE_INC
-#define TH_SIZE_INC
-
-#include "THGeneral.h"
-#include <stddef.h>
-
-// THTensor functions that would work on a THSize if we had such a class in C++,
-// i.e. THTensor functions that depend only on the shape of the tensor, not the type.
-
-TH_API int THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB);
-TH_API ptrdiff_t THSize_nElement(long dims, long *size);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THStorage.c b/contrib/lua-torch/torch7/lib/TH/THStorage.c
deleted file mode 100644
index f6b63f4a8..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THStorage.c
+++ /dev/null
@@ -1,153 +0,0 @@
-#include "THAtomic.h"
-#include "THStorage.h"
-
-#include "generic/THStorage.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THStorage.c"
-#include "THGenerateHalfType.h"
-
-#include "generic/THStorageCopy.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THStorageCopy.c"
-#include "THGenerateHalfType.h"
-
-
-THDescBuff THLongStorage_sizeDesc(const THLongStorage *size) {
- return _THSizeDesc(size->data, size->size);
-}
-
-THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement)
-{
- ptrdiff_t total_size = (size->size > 0 ? 1 : 0);
- ptrdiff_t dim_infer = -1;
- ptrdiff_t i;
- for (i = 0; i < size->size; i++) {
- if (size->data[i] == -1) {
- THArgCheck(dim_infer == -1, 1, "only one dimension can be inferred");
- dim_infer = i;
- } else {
- total_size *= size->data[i];
- }
- }
- if (dim_infer != -1) {
- THDescBuff buf = THLongStorage_sizeDesc(size);
- THArgCheck(total_size > 0 && nElement % total_size == 0, 2,
- "size '%s' is invalid for input of with %td elements", buf.str, nElement);
- } else {
- THDescBuff buf = THLongStorage_sizeDesc(size);
- THArgCheck(nElement == total_size, 2,
- "size '%s' is invalid for input of with %td elements", buf.str, nElement);
- }
- THLongStorage* copy = THLongStorage_newWithSize(size->size);
- THLongStorage_copy(copy, size);
- if (dim_infer != -1) {
- copy->data[dim_infer] = nElement / total_size;
- }
- return copy;
-}
-
-int THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA, long *sizesB, long dimsB,
- char *error_buffer, int buffer_len) {
- THArgCheck(sizesA != NULL, 1, "sizesA must not be null");
- THArgCheck(sizesB != NULL, 2, "sizesB must not be null");
- THArgCheck(dimsA, 1, "Can't expand empty tensor a");
- THArgCheck(dimsB, 1, "Can't expand empty tensor b");
- ptrdiff_t ndim = dimsA > dimsB ? dimsA : dimsB;
-
- long *expandedSizes = THAlloc(sizeof(long)*ndim);
-
- for (long i = ndim - 1; i >= 0; --i) {
- long offset = ndim - 1 - i;
- long dimA = dimsA - 1 - offset;
- long dimB = dimsB - 1 - offset;
- long sizeA = (dimA >= 0) ? sizesA[dimA] : 1;
- long sizeB = (dimB >= 0) ? sizesB[dimB] : 1;
- if (sizeA == sizeB || sizeA == 1 || sizeB == 1) {
- expandedSizes[i] = THMax(sizeA, sizeB);
- } else {
- THFree(expandedSizes);
- snprintf(error_buffer, buffer_len, "The size of tensor a (%ld) must match the size of tensor b (%ld) at "
- "non-singleton dimension %ld.", sizeA, sizeB, i);
- return -1;
- }
- }
- THLongStorage_resize(output, ndim);
- memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim);
- THFree(expandedSizes);
- return 0;
-}
-
-int THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims,
- char *error_buffer, int buffer_len) {
- THArgCheck(n > 0, 2, "n must be greater than 0");
- THArgCheck(sizes != NULL, 1, "sizes must not be null");
- THArgCheck(dims != NULL, 1, "dims must not be null");
-
- ptrdiff_t ndim = 0;
- for (int j = 0; j < n; ++j) {
- THArgCheck(sizes[ j ] != NULL, 1, "size %d must not be null", j);
- THArgCheck(dims[ j ], 1, "Can't expand empty tensor %d", j);
- ndim = dims[ j ] > ndim ? dims[ j ] : ndim;
- }
-
- long *expandedSizes = THAlloc(sizeof(long)*ndim);
-
- for (long i = ndim - 1; i >= 0; --i) {
- expandedSizes[ i ] = 1;
- long offset = ndim - 1 - i;
- for (int j = 0; j < n; ++j) {
- long dim = dims[ j ] - 1 - offset;
- long size = (dim >= 0) ? sizes[ j ][ dim ] : 1;
- if (size == expandedSizes[ i ] || size == 1 || expandedSizes[ i ] == 1) {
- expandedSizes[ i ] = THMax(expandedSizes[ i ], size);
- } else {
- THFree(expandedSizes);
- snprintf(error_buffer, buffer_len, "The size of tensor %i (%ld) must match the expanded size"
- "of tensor (%ld) at non-singleton dimension %ld.", j, size, expandedSizes[ i ], i);
- return -1;
- }
- }
- }
- THLongStorage_resize(output, ndim);
- memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim);
- THFree(expandedSizes);
- return 0;
-}
-
-int THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim,
- THLongStorage *sizes, long **expandedSizes, long **expandedStrides,
- char *error_buffer, int buffer_len) {
- ptrdiff_t ndim = THLongStorage_size(sizes);
-
- long *expandedSizesCalc = THAlloc(sizeof(long)*ndim);
- long *expandedStridesCalc = THAlloc(sizeof(long)*ndim);
-
- // create a new geometry for the tensors
- for (long i = ndim - 1; i >= 0; --i) {
- long offset = ndim - 1 - i;
- long dim = tensorDim - 1 - offset;
- long size = (dim >= 0) ? tensorSizes[dim] : 1;
- long stride = (dim >= 0) ?
- tensorStrides[dim] : expandedSizesCalc[i + 1] * expandedStridesCalc[i+1];
- long targetSize = THLongStorage_data(sizes)[i];
- if (size != targetSize) {
- if (size == 1) {
- size = targetSize;
- stride = 0;
- } else {
- THFree(expandedSizesCalc);
- THFree(expandedStridesCalc);
- snprintf(error_buffer, buffer_len, "The expanded size of the tensor (%ld) must match the existing size (%ld) at "
- "non-singleton dimension %ld.", targetSize, size, i);
- return -1;
- }
- }
- expandedSizesCalc[i] = size;
- expandedStridesCalc[i] = stride;
- }
- *expandedSizes = expandedSizesCalc;
- *expandedStrides = expandedStridesCalc;
- return 0;
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/THStorage.h b/contrib/lua-torch/torch7/lib/TH/THStorage.h
deleted file mode 100644
index fb7946bd9..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THStorage.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef TH_STORAGE_INC
-#define TH_STORAGE_INC
-
-#include "THGeneral.h"
-#include "THAllocator.h"
-
-#define THStorage TH_CONCAT_3(TH,Real,Storage)
-#define THStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)
-
-/* fast access methods */
-#define TH_STORAGE_GET(storage, idx) ((storage)->data[(idx)])
-#define TH_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value))
-
-#include "generic/THStorage.h"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THStorage.h"
-#include "THGenerateHalfType.h"
-
-#include "generic/THStorageCopy.h"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THStorageCopy.h"
-#include "THGenerateHalfType.h"
-
-TH_API THDescBuff THLongStorage_sizeDesc(const THLongStorage *size);
-TH_API THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement);
-
-// Given the sizes of {2,N} tensors, write out the size when the tensors are expanded together.
-TH_API int THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA,
- long *sizesB, long dimsB, char *error_buffer, int buffer_len);
-TH_API int THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims,
- char *error_buffer, int buffer_len);
-
-TH_API int THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim,
- THLongStorage *sizes, long **expandedSizes, long **expandedStrides,
- char *error_buffer, int buffer_len);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THTensor.c b/contrib/lua-torch/torch7/lib/TH/THTensor.c
deleted file mode 100644
index 115e396a1..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THTensor.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include "THAtomic.h"
-#include "THTensor.h"
-#include "THVector.h"
-#include "generic/simd/simd.h"
-
-#include "THBlas.h"
-#include "THLapack.h"
-#include "THRandom.h"
-#include "THTensorDimApply.h"
-#include "THMath.h"
-
-#include "generic/THTensor.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THTensor.c"
-#include "THGenerateHalfType.h"
-
-#include "generic/THTensorCopy.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THTensorCopy.c"
-#include "THGenerateHalfType.h"
-
-#include "generic/THTensorRandom.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THTensorMath.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THTensorConv.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THTensorLapack.c"
-#include "THGenerateFloatTypes.h"
diff --git a/contrib/lua-torch/torch7/lib/TH/THTensor.h b/contrib/lua-torch/torch7/lib/TH/THTensor.h
deleted file mode 100644
index d2a1c57e8..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THTensor.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef TH_TENSOR_INC
-#define TH_TENSOR_INC
-
-#include "THStorage.h"
-#include "THTensorApply.h"
-
-#define THTensor TH_CONCAT_3(TH,Real,Tensor)
-#define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME)
-
-/* basics */
-#include "generic/THTensor.h"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THTensor.h"
-#include "THGenerateHalfType.h"
-
-#include "generic/THTensorCopy.h"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THTensorCopy.h"
-#include "THGenerateHalfType.h"
-
-#include "THTensorMacros.h"
-
-/* random numbers */
-#include "THRandom.h"
-#include "generic/THTensorRandom.h"
-#include "THGenerateAllTypes.h"
-
-/* maths */
-#include "generic/THTensorMath.h"
-#include "THGenerateAllTypes.h"
-
-/* convolutions */
-#include "generic/THTensorConv.h"
-#include "THGenerateAllTypes.h"
-
-/* lapack support */
-#include "generic/THTensorLapack.h"
-#include "THGenerateFloatTypes.h"
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THTensorApply.h b/contrib/lua-torch/torch7/lib/TH/THTensorApply.h
deleted file mode 100644
index 7f48da47e..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THTensorApply.h
+++ /dev/null
@@ -1,238 +0,0 @@
-#ifndef TH_TENSOR_APPLY_INC
-#define TH_TENSOR_APPLY_INC
-
-/*
- * The basic strategy for apply is as follows:
- *
- * 1. Starting with the outermost index, loop until we reach a dimension where the
- * data is no longer contiguous, i.e. the stride at that dimension is not equal to
- * the size of the tensor defined by the outer dimensions. Let's call this outer
- * (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal
- * to the entire Tensor. Let's call the inner tensor B.
- *
- * 2. We loop through the indices in B, starting at its outermost dimension. For
- * example, if B is a 2x2 matrix, then we do:
- *
- * B[0][0]
- * B[0][1]
- * B[1][0]
- * B[1][1]
- *
- * We set the offset into the underlying storage as (storageOffset + stride_B * index_B),
- * i.e. basically we compute the offset into the storage as we would normally for a
- * Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we
- * can simply loop for sizeof(A) iterations and perform the operation, without having to
- * follow the order described by the strides of A.
- *
- * 3. As an optimization, we merge dimensions of A that are contiguous in memory. For
- * example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, then the first two
- * dimensions can be merged for the purposes of APPLY, reducing the number of nested
- * loops.
- */
-
-#define __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, ALLOW_CONTIGUOUS) \
- TYPE *TENSOR##_data = NULL; \
- long *TENSOR##_counter = NULL, *TENSOR##_sizes = NULL, *TENSOR##_strides = NULL, *TENSOR##_dimOffset = NULL; \
- long TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i, TENSOR##_n; \
- int TENSOR##_contiguous = ALLOW_CONTIGUOUS && DIM < 0; \
- TENSOR##_n = (TENSOR->nDimension ? 1 : 0); \
- for(TENSOR##_i = 0; TENSOR##_i < TENSOR->nDimension; TENSOR##_i++) \
- TENSOR##_n *= TENSOR->size[TENSOR##_i]; \
-\
- if(TENSOR->nDimension == 0) \
- TH_TENSOR_APPLY_hasFinished = 1; \
- else \
- { \
- TENSOR##_data = TENSOR->storage->data+TENSOR->storageOffset; \
- TENSOR##_size = 1; \
- TENSOR##_stride = 1; \
- for(TENSOR##_i = TENSOR->nDimension-1; TENSOR##_i >= 0; TENSOR##_i--) { \
- if(TENSOR->size[TENSOR##_i] != 1) { \
- if(TENSOR->stride[TENSOR##_i] == TENSOR##_size && TENSOR##_i != DIM) \
- TENSOR##_size *= TENSOR->size[TENSOR##_i]; \
- else{ \
- TENSOR##_contiguous = 0; \
- break; \
- } \
- } \
- } \
- if (!TENSOR##_contiguous) { \
- /* Find the dimension of contiguous sections */ \
- TENSOR##_dim = 1; \
- for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; TENSOR##_i--) \
- { \
- if(TENSOR->stride[TENSOR##_i] != TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] || TENSOR##_i == DIM || TENSOR##_i+1 == DIM) \
- TENSOR##_dim++; \
- } \
- /* Allocate an array of 3*dim elements, where dim is the number of contiguous sections */ \
- TENSOR##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR##_dim)); \
- TENSOR##_sizes = TENSOR##_counter + TENSOR##_dim; \
- TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \
- TH_TENSOR_dim_index = TENSOR##_dim-1; \
- TENSOR##_dimOffset = (DIM == TENSOR->nDimension-1) ? &TENSOR##_i : &TENSOR##_counter[DIM]; \
- TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR->nDimension-1]; \
- TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \
- /* TENSOR##_counter tracks where we are in the storage. The offset into the */ \
- /* storage is given by storage_offset + (i * j), where i is the stride */ \
- /* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \
- for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \
- TENSOR##_counter[TENSOR##_i] = 0; \
- } \
- for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; --TENSOR##_i) { \
- if (TENSOR->stride[TENSOR##_i] == TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] && TENSOR##_i != DIM && TENSOR##_i+1 != DIM) { \
- TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_sizes[TH_TENSOR_dim_index]; \
- if (DIM != TENSOR->nDimension-1 && TENSOR##_i < DIM) \
- TENSOR##_dimOffset--; \
- } else { \
- --TH_TENSOR_dim_index; \
- TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i]; \
- TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR##_i]; \
- } \
- } \
- /* Size of the inner most section */ \
- TENSOR##_size = TENSOR##_sizes[TENSOR##_dim-1]; \
- /* Stride of the inner most section */ \
- TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \
- } \
- } \
- TENSOR##_i = 0;
-
-#define __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, ALWAYS_UPDATE) \
- if(TENSOR##_i == TENSOR##_size || ALWAYS_UPDATE) \
- { \
- if(TENSOR##_contiguous) \
- break; \
-\
- if(TENSOR##_dim == 1) \
- break; \
-\
- /* Reset pointer to beginning of loop */ \
- TENSOR##_data -= TENSOR##_size*TENSOR##_stride; \
- for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \
- { \
- TENSOR##_counter[TENSOR##_i]++; \
- /* Jump ahread by the stride of this dimension */ \
- TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \
-\
- if(TENSOR##_counter[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]) \
- { \
- if(TENSOR##_i == 0) \
- { \
- TH_TENSOR_APPLY_hasFinished = 1; \
- break; \
- } \
- else \
- { \
- /* Reset the pointer to the beginning of the chunk defined by this dimension */ \
- TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \
- TENSOR##_counter[TENSOR##_i] = 0; \
- } \
- } \
- else \
- break; \
- } \
- TENSOR##_i = 0; \
- } \
-
-#define TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIM, CODE) \
-{ \
- int TH_TENSOR_APPLY_hasFinished = 0; \
- long TH_TENSOR_dim_index = 0; \
- __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
- __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
- __TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, DIM, 1) \
- \
- int elements_equal = 1; \
- if(TENSOR1##_n != TENSOR2##_n) { \
- elements_equal = 0; \
- } \
- else if(TENSOR1##_n != TENSOR3##_n) { \
- elements_equal = 0; \
- } \
- if (elements_equal == 0) { \
- THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \
- THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \
- THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \
- THError("inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same " \
- "number of elements, but got %d, %d and %d elements respectively", \
- #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, \
- TENSOR1##_n, TENSOR2##_n, TENSOR3##_n); \
- } \
- \
- while(!TH_TENSOR_APPLY_hasFinished) \
- { \
- /* Loop through the inner most region of the Tensor */ \
- for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size && TENSOR3##_i < TENSOR3##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR3##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride, TENSOR3##_data += TENSOR3##_stride) /* 0 et pas TENSOR##_dim! */ \
- { \
- CODE \
- } \
- __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
- __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
- __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR3, 0) \
- } \
- if(TENSOR1##_counter != NULL) \
- THFree(TENSOR1##_counter); \
- if(TENSOR2##_counter != NULL) \
- THFree(TENSOR2##_counter); \
- if(TENSOR3##_counter != NULL) \
- THFree(TENSOR3##_counter); \
-}
-
-#define TH_TENSOR_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
- TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, -1, CODE)
-
-#define TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, DIM, CODE) \
-{ \
- int TH_TENSOR_APPLY_hasFinished = 0; \
- long TH_TENSOR_dim_index = 0; \
- __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \
- __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \
-\
- if(TENSOR1##_n != TENSOR2##_n) { \
- THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \
- THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \
- THError("inconsistent tensor size, expected %s %s and %s %s to have the same " \
- "number of elements, but got %d and %d elements respectively", \
- #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, TENSOR1##_n, TENSOR2##_n); \
- } \
- while(!TH_TENSOR_APPLY_hasFinished) \
- { \
- /* Loop through the inner most region of the Tensor */ \
- for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \
- { \
- CODE \
- } \
- __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \
- __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \
- } \
- if(TENSOR1##_counter != NULL) \
- THFree(TENSOR1##_counter); \
- if(TENSOR2##_counter != NULL) \
- THFree(TENSOR2##_counter); \
-}
-
-#define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
- TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, -1, CODE)
-
-#define TH_TENSOR_APPLY_D(TYPE, TENSOR, DIM, CODE) \
-{ \
- int TH_TENSOR_APPLY_hasFinished = 0; \
- long TH_TENSOR_dim_index = 0; \
- __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, 0) \
-\
- while(!TH_TENSOR_APPLY_hasFinished) \
- { \
- /* Loop through the inner most region of the Tensor */ \
- for(; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \
- { \
- CODE \
- } \
- __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1) \
- } \
- THFree(TENSOR##_counter); \
-}
-
-#define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \
- TH_TENSOR_APPLY_D(TYPE, TENSOR, -1, CODE)
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THTensorDimApply.h b/contrib/lua-torch/torch7/lib/TH/THTensorDimApply.h
deleted file mode 100644
index 6727e1f7f..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THTensorDimApply.h
+++ /dev/null
@@ -1,324 +0,0 @@
-#ifndef TH_TENSOR_DIM_APPLY_INC
-#define TH_TENSOR_DIM_APPLY_INC
-
-#define TH_TENSOR_DIM_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIMENSION, CODE) \
-{ \
- TYPE1 *TENSOR1##_data = NULL; \
- long TENSOR1##_stride = 0, TENSOR1##_size = 0; \
- TYPE2 *TENSOR2##_data = NULL; \
- long TENSOR2##_stride = 0, TENSOR2##_size = 0; \
- TYPE3 *TENSOR3##_data = NULL; \
- long TENSOR3##_stride = 0, TENSOR3##_size = 0; \
- long *TH_TENSOR_DIM_APPLY_counter = NULL; \
- int TH_TENSOR_DIM_APPLY_hasFinished = 0; \
- int TH_TENSOR_DIM_APPLY_i; \
-\
- if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \
- THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, TENSOR1->nDimension); \
- int same_dims = 1; \
- if( TENSOR1->nDimension != TENSOR2->nDimension ) { \
- same_dims = 0; \
- } \
- if( TENSOR1->nDimension != TENSOR3->nDimension ) { \
- same_dims = 0; \
- } \
- if (same_dims == 0) { \
- THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \
- THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \
- THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \
- THError("inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same " \
- "number of dimensions", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str); \
- } \
- int shape_check_flag = 0; \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
- continue; \
- if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) \
- shape_check_flag = 1; \
- if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR3->size[TH_TENSOR_DIM_APPLY_i]) \
- shape_check_flag = 1; \
- } \
- \
- if (shape_check_flag == 1) { \
- THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \
- THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \
- THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \
- THError("Expected %s %s, %s %s and %s %s to have the same size in dimension %d", \
- #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, DIMENSION); \
- } \
-\
- TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
-\
- TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \
- TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \
- TENSOR1##_size = TENSOR1->size[DIMENSION]; \
-\
- TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \
- TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \
- TENSOR2##_size = TENSOR2->size[DIMENSION]; \
-\
- TENSOR3##_data = (TENSOR3)->storage->data+(TENSOR3)->storageOffset; \
- TENSOR3##_stride = (TENSOR3)->stride[DIMENSION]; \
- TENSOR3##_size = TENSOR3->size[DIMENSION]; \
-\
- while(!TH_TENSOR_DIM_APPLY_hasFinished) \
- { \
- CODE \
-\
- if(TENSOR1->nDimension == 1) \
- break; \
- \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \
- { \
- TH_TENSOR_DIM_APPLY_hasFinished = 1; \
- break; \
- } \
- continue; \
- } \
-\
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
- TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
- TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
- TENSOR3##_data += TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \
-\
- if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \
- { \
- TH_TENSOR_DIM_APPLY_hasFinished = 1; \
- break; \
- } \
- else \
- { \
- TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
- TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
- TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
- } \
- } \
- else \
- break; \
- } \
- } \
- THFree(TH_TENSOR_DIM_APPLY_counter); \
-}
-
-/**
- * Similar to DIM_APPLY(...) but we maintain two sets of pointers: one for the first tensor
- * and one for the second. The two tensors must have the same shape, other than at the
- * specified DIMENSION. This function makes it easy to store the output from reducing the
- * TENSOR at index. For example, in the sum example described below, we could instead do:
- *
- * long i = 0;
- * TYPE1 sum;
- *
- * for (i = 0; i < TENSOR1##_size; ++i) {
- * sum += TENSOR1##_data[i * TENSOR1##_stride]
- * }
- * *TENSOR2##_data = (TYPE2) sum;
- *
- * In particular, we guarantee that the offset into TENSOR2 will be what you would get if
- * you applied all of the index values used to generate the offset into TENSOR1.
- */
-#define TH_TENSOR_DIM_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, DIMENSION, CODE) \
-{ \
- TYPE1 *TENSOR1##_data = NULL; \
- long TENSOR1##_stride = 0, TENSOR1##_size = 0; \
- TYPE2 *TENSOR2##_data = NULL; \
- long TENSOR2##_stride = 0, TENSOR2##_size = 0; \
- long *TH_TENSOR_DIM_APPLY_counter = NULL; \
- int TH_TENSOR_DIM_APPLY_hasFinished = 0; \
- int TH_TENSOR_DIM_APPLY_i; \
-\
- if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \
- THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, TENSOR1->nDimension); \
- if( TENSOR1->nDimension != TENSOR2->nDimension ) { \
- THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \
- THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \
- THError("inconsistent tensor size, expected %s %s and %s %s to have the same " \
- "number of dimensions", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str); \
- } \
- int shape_check_flag = 0; \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
- continue; \
- if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) { \
- THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \
- THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \
- THError("Expected %s %s and %s %s to have the same size in dimension %d", \
- #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, DIMENSION); \
- } \
- } \
-\
- TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
-\
- TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \
- TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \
- TENSOR1##_size = TENSOR1->size[DIMENSION]; \
-\
- TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \
- TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \
- TENSOR2##_size = TENSOR2->size[DIMENSION]; \
-\
- while(!TH_TENSOR_DIM_APPLY_hasFinished) \
- { \
- CODE \
-\
- if(TENSOR1->nDimension == 1) \
- break; \
- \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \
- { \
- TH_TENSOR_DIM_APPLY_hasFinished = 1; \
- break; \
- } \
- continue; \
- } \
-\
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
- TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
- TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
-\
- if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \
- { \
- TH_TENSOR_DIM_APPLY_hasFinished = 1; \
- break; \
- } \
- else \
- { \
- TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \
- TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
- } \
- } \
- else \
- break; \
- } \
- } \
- THFree(TH_TENSOR_DIM_APPLY_counter); \
-}
-
-/**
- * The basic idea for DIM_APPLY: Given a TENSOR and a DIMENSION, provide access to the data stored
- * at all sets of dimension values other than DIMENSION, such that we can get all the values at those
- * fixed indices for the various values at DIMENSION.
- *
- * Suppose we have a 2x3x4 Tensor A, and we have DIMENSION=2. Then we will hit CODE (2x3) times, and the
- * pointer into storage will be at:
- *
- * A[0][0]
- * A[0][1]
- * A[0][2]
- * A[1][0]
- * A[1][1]
- * A[1][2]
- *
- * And at each point, we can access the data for each of the four elements of the Tensor via
- * TENSOR##_stride. So for example, if we wanted to sum the elements there, we could do:
- *
- * long i = 0;
- * TYPE sum;
- * for (i = 0; i < TENSOR##_size; i++) {
- * sum += TENSOR##_data[i * TENSOR##_stride]
- * }
- *
- * Note that we don't have to have DIMENSION be the last tensor. If we have DIMENSION=1, then we will hit the
- * code (2x4) times, with pointer into the storage at:
- *
- * offset +
- * stride_0 * 0 + stride_2 * 0
- * stride_0 * 1 + stride_2 * 0
- * stride_0 * 0 + stride_2 * 1
- * stride_0 * 1 + stride_2 * 1
- * stride_0 * 0 + stride_2 * 2
- * stride_0 * 1 + stride_2 * 2
- * stride_0 * 0 + stride_2 * 3
- * stride_0 * 1 + stride_2 * 3
- *
- * So we can again sum over the values at DIMENSION with the other indices fixed.
- */
-#define TH_TENSOR_DIM_APPLY(TYPE, TENSOR, DIMENSION, CODE) \
-{ \
- TYPE *TENSOR##_data = NULL; \
- long TENSOR##_stride = 0, TENSOR##_size = 0; \
- long *TH_TENSOR_DIM_APPLY_counter = NULL; \
- int TH_TENSOR_DIM_APPLY_hasFinished = 0; \
- int TH_TENSOR_DIM_APPLY_i; \
-\
- if( (DIMENSION < 0) || (DIMENSION >= TENSOR->nDimension) ) \
- THError("invalid dimension"); \
-\
- TENSOR##_data = (TENSOR)->storage->data+(TENSOR)->storageOffset; \
- TENSOR##_stride = (TENSOR)->stride[DIMENSION]; \
- TENSOR##_size = TENSOR->size[DIMENSION]; \
- /* Counter stores the indices into the Tensor at any time */ \
- TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR->nDimension)); \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
-\
- while(!TH_TENSOR_DIM_APPLY_hasFinished) \
- { \
- CODE \
-\
- if(TENSOR->nDimension == 1) \
- break; \
- \
- for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \
- { \
- /* Check if the index is equal to DIMENSION. We don't need to update the */ \
- /* offset if this is the case, and can consider the next index. However, */ \
- /* in the case that the DIMENSION is the last index in the Tensor, then */ \
- /* we have parsed the entire tensor and can exit */ \
- if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \
- { \
- if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \
- { \
- TH_TENSOR_DIM_APPLY_hasFinished = 1; \
- break; \
- } \
- continue; \
- } \
-\
- /* Bump the counter at this index, update the pointer */ \
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \
- TENSOR##_data += TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \
-\
- if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR->size[TH_TENSOR_DIM_APPLY_i]) \
- { \
- /* Handled TENSOR_size(dim) iterations for DIM_APPLY_i. If this is the last dimension, exit */ \
- if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \
- { \
- TH_TENSOR_DIM_APPLY_hasFinished = 1; \
- break; \
- } \
- else \
- { \
- /* Reset the counter, and the pointer to the beginning of the storage for this combination of indices */ \
- TENSOR##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \
- TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \
- } \
- } \
- else \
- break; \
- } \
- } \
- THFree(TH_TENSOR_DIM_APPLY_counter); \
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THTensorMacros.h b/contrib/lua-torch/torch7/lib/TH/THTensorMacros.h
deleted file mode 100644
index 15b67665e..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THTensorMacros.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef TH_TENSOR_MACROS_INC
-#define TH_TENSOR_MACROS_INC
-
-/* fast method to access to tensor data */
-
-#define THTensor_fastGet1d(self, x0) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]])
-
-#define THTensor_fastGet2d(self, x0, x1) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]])
-
-#define THTensor_fastGet3d(self, x0, x1, x2) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]])
-
-#define THTensor_fastGet4d(self, x0, x1, x2, x3) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]])
-
-#define THTensor_fastSet1d(self, x0, value) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]] = value)
-
-#define THTensor_fastSet2d(self, x0, x1, value) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]] = value)
-
-#define THTensor_fastSet3d(self, x0, x1, x2, value) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]] = value)
-
-#define THTensor_fastSet4d(self, x0, x1, x2, x3, value) \
- (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]] = value)
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/THVector.c b/contrib/lua-torch/torch7/lib/TH/THVector.c
deleted file mode 100644
index 441057884..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THVector.c
+++ /dev/null
@@ -1,30 +0,0 @@
-#include "THVector.h"
-
-#include "generic/simd/simd.h"
-
-#ifdef __NEON__
-#include "vector/NEON.c"
-#endif
-
-#ifdef __PPC64__
-#include "vector/VSX.c"
-#endif
-
-#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
-#include "vector/SSE.c"
-#endif
-
-#if defined(USE_AVX)
-#include "vector/AVX.h"
-#endif
-
-#if defined(USE_AVX2)
-#include "vector/AVX2.h"
-#endif
-
-#include "generic/THVectorDefault.c"
-#include "THGenerateAllTypes.h"
-
-#include "generic/THVectorDispatch.c"
-#include "THGenerateAllTypes.h"
diff --git a/contrib/lua-torch/torch7/lib/TH/THVector.h b/contrib/lua-torch/torch7/lib/TH/THVector.h
deleted file mode 100644
index e29917b93..000000000
--- a/contrib/lua-torch/torch7/lib/TH/THVector.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef TH_VECTOR_INC
-#define TH_VECTOR_INC
-
-#include "THGeneral.h"
-
-#define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME)
-
-/* We are going to use dynamic dispatch, and want only to generate declarations
- * of the vector functions */
-#include "generic/THVector.h"
-#include "THGenerateAllTypes.h"
-
-#endif // TH_VECTOR_INC
diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindARM.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindARM.cmake
deleted file mode 100644
index 2dcb2a24f..000000000
--- a/contrib/lua-torch/torch7/lib/TH/cmake/FindARM.cmake
+++ /dev/null
@@ -1,76 +0,0 @@
-# Check if the processor is an ARM and if Neon instruction are available on the machine where
-# the project is compiled.
-
-IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
- EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
-
- #neon instruction can be found on the majority part of modern ARM processor
- STRING(REGEX REPLACE "^.*(neon).*$" "\\1" NEON_THERE ${CPUINFO})
- STRING(COMPARE EQUAL "neon" "${NEON_THERE}" NEON_TRUE)
- IF (NEON_TRUE)
- set(NEON_FOUND true CACHE BOOL "NEON available on host")
- ELSE (NEON_TRUE)
- set(NEON_FOUND false CACHE BOOL "NEON available on host")
- ENDIF (NEON_TRUE)
-
- # on ARMv8, neon is inherit and instead listed as 'asimd' in /proc/cpuinfo
- STRING(REGEX REPLACE "^.*(asimd).*$" "\\1" ASIMD_THERE ${CPUINFO})
- STRING(COMPARE EQUAL "asimd" "${ASIMD_THERE}" ASIMD_TRUE)
- IF (ASIMD_TRUE)
- set(ASIMD_FOUND true CACHE BOOL "ASIMD/NEON available on host")
- ELSE (ASIMD_TRUE)
- set(ASIMD_FOUND false CACHE BOOL "ASIMD/NEON available on host")
- ENDIF (ASIMD_TRUE)
-
- #Find the processor type (for now OMAP3 or OMAP4)
- STRING(REGEX REPLACE "^.*(OMAP3).*$" "\\1" OMAP3_THERE ${CPUINFO})
- STRING(COMPARE EQUAL "OMAP3" "${OMAP3_THERE}" OMAP3_TRUE)
- IF (OMAP3_TRUE)
- set(CORTEXA8_FOUND true CACHE BOOL "OMAP3 available on host")
- ELSE (OMAP3_TRUE)
- set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 available on host")
- ENDIF (OMAP3_TRUE)
-
- #Find the processor type (for now OMAP3 or OMAP4)
- STRING(REGEX REPLACE "^.*(OMAP4).*$" "\\1" OMAP4_THERE ${CPUINFO})
- STRING(COMPARE EQUAL "OMAP4" "${OMAP4_THERE}" OMAP4_TRUE)
- IF (OMAP4_TRUE)
- set(CORTEXA9_FOUND true CACHE BOOL "OMAP4 available on host")
- ELSE (OMAP4_TRUE)
- set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 available on host")
- ENDIF (OMAP4_TRUE)
-
-ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
- EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
- CPUINFO)
-
- #neon instruction can be found on the majority part of modern ARM processor
- STRING(REGEX REPLACE "^.*(neon).*$" "\\1" NEON_THERE ${CPUINFO})
- STRING(COMPARE EQUAL "neon" "${NEON_THERE}" NEON_TRUE)
- IF (NEON_TRUE)
- set(NEON_FOUND true CACHE BOOL "NEON available on host")
- ELSE (NEON_TRUE)
- set(NEON_FOUND false CACHE BOOL "NEON available on host")
- ENDIF (NEON_TRUE)
-
-ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
- # TODO
- set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 not available on host")
- set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 not available on host")
- set(NEON_FOUND false CACHE BOOL "NEON not available on host")
-ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
- set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 not available on host")
- set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 not available on host")
- set(NEON_FOUND false CACHE BOOL "NEON not available on host")
-ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
-
-if(NOT NEON_FOUND)
- MESSAGE(STATUS "Could not find hardware support for NEON on this machine.")
-endif(NOT NEON_FOUND)
-if(NOT CORTEXA8_FOUND)
- MESSAGE(STATUS "No OMAP3 processor on this machine.")
-endif(NOT CORTEXA8_FOUND)
-if(NOT CORTEXA9_FOUND)
- MESSAGE(STATUS "No OMAP4 processor on this machine.")
-endif(NOT CORTEXA9_FOUND)
-mark_as_advanced(NEON_FOUND)
diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindBLAS.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindBLAS.cmake
deleted file mode 100644
index 1f254d231..000000000
--- a/contrib/lua-torch/torch7/lib/TH/cmake/FindBLAS.cmake
+++ /dev/null
@@ -1,309 +0,0 @@
-# - Find BLAS library
-# This module finds an installed fortran library that implements the BLAS
-# linear-algebra interface (see http://www.netlib.org/blas/).
-# The list of libraries searched for is taken
-# from the autoconf macro file, acx_blas.m4 (distributed at
-# http://ac-archive.sourceforge.net/ac-archive/acx_blas.html).
-#
-# This module sets the following variables:
-# BLAS_FOUND - set to true if a library implementing the BLAS interface is found.
-# BLAS_INFO - name of the detected BLAS library.
-# BLAS_F2C - set to true if following the f2c return convention
-# BLAS_LIBRARIES - list of libraries to link against to use BLAS
-# BLAS_INCLUDE_DIR - include directory
-
-# Do nothing is BLAS was found before
-IF(NOT BLAS_FOUND)
-
-SET(BLAS_LIBRARIES)
-SET(BLAS_INCLUDE_DIR)
-SET(BLAS_INFO)
-SET(BLAS_F2C)
-
-SET(WITH_BLAS "" CACHE STRING "Blas type [mkl/open/goto/acml/atlas/accelerate/veclib/generic]")
-
-# Old FindBlas
-INCLUDE(CheckCSourceRuns)
-INCLUDE(CheckFortranFunctionExists)
-
-MACRO(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list)
- # This macro checks for the existence of the combination of fortran libraries
- # given by _list. If the combination is found, this macro checks (using the
- # Check_Fortran_Function_Exists macro) whether can link against that library
- # combination using the name of a routine given by _name using the linker
- # flags given by _flags. If the combination of libraries is found and passes
- # the link test, LIBRARIES is set to the list of complete library paths that
- # have been found. Otherwise, LIBRARIES is set to NOTFOUND.
- # N.B. _prefix is the prefix applied to the names of all cached variables that
- # are generated internally and marked advanced by this macro.
-
- set(__list)
- foreach(_elem ${_list})
- if(__list)
- set(__list "${__list} - ${_elem}")
- else(__list)
- set(__list "${_elem}")
- endif(__list)
- endforeach(_elem)
- message(STATUS "Checking for [${__list}]")
-
- set(_libraries_work TRUE)
- set(${LIBRARIES})
- set(_combined_name)
- foreach(_library ${_list})
- set(_combined_name ${_combined_name}_${_library})
- if(_libraries_work)
- if ( WIN32 )
- find_library(${_prefix}_${_library}_LIBRARY
- NAMES ${_library}
- PATHS ENV LIB
- PATHS ENV PATH )
- endif ( WIN32 )
- if ( APPLE )
- find_library(${_prefix}_${_library}_LIBRARY
- NAMES ${_library}
- PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64
- ENV DYLD_LIBRARY_PATH )
- else ( APPLE )
- find_library(${_prefix}_${_library}_LIBRARY
- NAMES ${_library}
- PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64
- ENV LD_LIBRARY_PATH )
- endif( APPLE )
- mark_as_advanced(${_prefix}_${_library}_LIBRARY)
- set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
- set(_libraries_work ${${_prefix}_${_library}_LIBRARY})
- MESSAGE(STATUS " Library ${_library}: ${${_prefix}_${_library}_LIBRARY}")
- endif(_libraries_work)
- endforeach(_library ${_list})
- if(_libraries_work)
- # Test this combination of libraries.
- set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}})
- if (CMAKE_Fortran_COMPILER_WORKS)
- check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS)
- else (CMAKE_Fortran_COMPILER_WORKS)
- check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS)
- endif (CMAKE_Fortran_COMPILER_WORKS)
- set(CMAKE_REQUIRED_LIBRARIES)
- mark_as_advanced(${_prefix}${_combined_name}_WORKS)
- set(_libraries_work ${${_prefix}${_combined_name}_WORKS})
- endif(_libraries_work)
- if(NOT _libraries_work)
- set(${LIBRARIES} NOTFOUND)
- endif(NOT _libraries_work)
-endmacro(Check_Fortran_Libraries)
-
-# Intel MKL?
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "mkl")))
- FIND_PACKAGE(MKL)
- IF(MKL_FOUND)
- SET(BLAS_INFO "mkl")
- SET(BLAS_LIBRARIES ${MKL_LIBRARIES})
- SET(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR})
- SET(BLAS_VERSION ${MKL_VERSION})
- ENDIF(MKL_FOUND)
-endif()
-
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "openblas")
- if(BLAS_LIBRARIES)
- set(BLAS_INFO "open")
- endif(BLAS_LIBRARIES)
-endif()
-
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "openblas;pthread")
- if(BLAS_LIBRARIES)
- set(BLAS_INFO "open")
- endif(BLAS_LIBRARIES)
-endif()
-
-if((NOT BLAS_LIBRARIES) AND (WIN32)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "libopenblas")
- if(BLAS_LIBRARIES)
- set(BLAS_INFO "open")
- endif(BLAS_LIBRARIES)
-endif()
-
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "goto")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "goto2;gfortran")
- if (BLAS_LIBRARIES)
- set(BLAS_INFO "goto")
- endif (BLAS_LIBRARIES)
-endif()
-
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "goto")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "goto2;gfortran;pthread")
- if (BLAS_LIBRARIES)
- set(BLAS_INFO "goto")
- endif (BLAS_LIBRARIES)
-endif()
-
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "acml")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "acml;gfortran")
- if (BLAS_LIBRARIES)
- set(BLAS_INFO "acml")
- endif (BLAS_LIBRARIES)
-endif()
-
-# Apple BLAS library?
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "accelerate")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "Accelerate")
- if (BLAS_LIBRARIES)
- set(BLAS_INFO "accelerate")
- set(BLAS_IS_ACCELERATE 1)
- endif (BLAS_LIBRARIES)
-endif()
-
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "veclib")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "vecLib")
- if (BLAS_LIBRARIES)
- set(BLAS_INFO "veclib")
- endif (BLAS_LIBRARIES)
-endif()
-
-# BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "atlas")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "ptf77blas;atlas;gfortran")
- if (BLAS_LIBRARIES)
- set(BLAS_INFO "atlas")
- endif (BLAS_LIBRARIES)
-endif()
-
-# Generic BLAS library?
-if((NOT BLAS_LIBRARIES)
- AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "generic")))
- check_fortran_libraries(
- BLAS_LIBRARIES
- BLAS
- sgemm
- ""
- "blas")
- if (BLAS_LIBRARIES)
- check_fortran_libraries(
- TMP_BLAS_LIBRARIES
- TMP_BLAS
- openblas_get_num_threads
- ""
- "blas")
- if (TMP_BLAS_LIBRARIES)
- set(BLAS_INFO "open")
- else()
- set(BLAS_INFO "generic")
- endif()
- endif (BLAS_LIBRARIES)
-endif()
-
-# Determine if blas was compiled with the f2c conventions
-IF (BLAS_LIBRARIES)
- SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
- CHECK_C_SOURCE_RUNS("
-#include <stdlib.h>
-#include <stdio.h>
-float x[4] = { 1, 2, 3, 4 };
-float y[4] = { .1, .01, .001, .0001 };
-int four = 4;
-int one = 1;
-extern double sdot_();
-int main() {
- int i;
- double r = sdot_(&four, x, &one, y, &one);
- exit((float)r != (float).1234);
-}" BLAS_F2C_DOUBLE_WORKS )
- CHECK_C_SOURCE_RUNS("
-#include <stdlib.h>
-#include <stdio.h>
-float x[4] = { 1, 2, 3, 4 };
-float y[4] = { .1, .01, .001, .0001 };
-int four = 4;
-int one = 1;
-extern float sdot_();
-int main() {
- int i;
- double r = sdot_(&four, x, &one, y, &one);
- exit((float)r != (float).1234);
-}" BLAS_F2C_FLOAT_WORKS )
- IF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
- MESSAGE(STATUS "This BLAS uses the F2C return conventions")
- SET(BLAS_F2C TRUE)
- ELSE (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
- SET(BLAS_F2C FALSE)
- ENDIF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)
-ENDIF(BLAS_LIBRARIES)
-
-# epilogue
-
-if(BLAS_LIBRARIES)
- set(BLAS_FOUND TRUE)
-else(BLAS_LIBRARIES)
- set(BLAS_FOUND FALSE)
-endif(BLAS_LIBRARIES)
-
-IF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED)
- message(FATAL_ERROR "Cannot find a library with BLAS API. Please specify library location.")
-ENDIF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED)
-IF(NOT BLAS_FIND_QUIETLY)
- IF(BLAS_FOUND)
- MESSAGE(STATUS "Found a library with BLAS API (${BLAS_INFO}).")
- ELSE(BLAS_FOUND)
- MESSAGE(STATUS "Cannot find a library with BLAS API. Not using BLAS.")
- ENDIF(BLAS_FOUND)
-ENDIF(NOT BLAS_FIND_QUIETLY)
-
-# Do nothing is BLAS was found before
-ENDIF(NOT BLAS_FOUND)
diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindLAPACK.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindLAPACK.cmake
deleted file mode 100644
index 9eca0730f..000000000
--- a/contrib/lua-torch/torch7/lib/TH/cmake/FindLAPACK.cmake
+++ /dev/null
@@ -1,190 +0,0 @@
-# - Find LAPACK library
-# This module finds an installed fortran library that implements the LAPACK
-# linear-algebra interface (see http://www.netlib.org/lapack/).
-#
-# The approach follows that taken for the autoconf macro file, acx_lapack.m4
-# (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html).
-#
-# This module sets the following variables:
-# LAPACK_FOUND - set to true if a library implementing the LAPACK interface is found
-# LAPACK_LIBRARIES - list of libraries (using full path name) for LAPACK
-
-# Note: I do not think it is a good idea to mixup different BLAS/LAPACK versions
-# Hence, this script wants to find a Lapack library matching your Blas library
-
-# Do nothing if LAPACK was found before
-IF(NOT LAPACK_FOUND)
-
-SET(LAPACK_LIBRARIES)
-SET(LAPACK_INFO)
-
-IF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
- FIND_PACKAGE(BLAS)
-ELSE(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
- FIND_PACKAGE(BLAS REQUIRED)
-ENDIF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)
-
-# Old search lapack script
-include(CheckFortranFunctionExists)
-
-macro(Check_Lapack_Libraries LIBRARIES _prefix _name _flags _list _blas)
- # This macro checks for the existence of the combination of fortran libraries
- # given by _list. If the combination is found, this macro checks (using the
- # Check_Fortran_Function_Exists macro) whether can link against that library
- # combination using the name of a routine given by _name using the linker
- # flags given by _flags. If the combination of libraries is found and passes
- # the link test, LIBRARIES is set to the list of complete library paths that
- # have been found. Otherwise, LIBRARIES is set to FALSE.
- # N.B. _prefix is the prefix applied to the names of all cached variables that
- # are generated internally and marked advanced by this macro.
- set(_libraries_work TRUE)
- set(${LIBRARIES})
- set(_combined_name)
- foreach(_library ${_list})
- set(_combined_name ${_combined_name}_${_library})
- if(_libraries_work)
- if (WIN32)
- find_library(${_prefix}_${_library}_LIBRARY
- NAMES ${_library} PATHS ENV LIB PATHS ENV PATH)
- else (WIN32)
- if(APPLE)
- find_library(${_prefix}_${_library}_LIBRARY
- NAMES ${_library}
- PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64
- ENV DYLD_LIBRARY_PATH)
- else(APPLE)
- find_library(${_prefix}_${_library}_LIBRARY
- NAMES ${_library}
- PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64
- ENV LD_LIBRARY_PATH)
- endif(APPLE)
- endif(WIN32)
- mark_as_advanced(${_prefix}_${_library}_LIBRARY)
- set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
- set(_libraries_work ${${_prefix}_${_library}_LIBRARY})
- endif(_libraries_work)
- endforeach(_library ${_list})
- if(_libraries_work)
- # Test this combination of libraries.
- set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas})
- if (CMAKE_Fortran_COMPILER_WORKS)
- check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS)
- else (CMAKE_Fortran_COMPILER_WORKS)
- check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS)
- endif (CMAKE_Fortran_COMPILER_WORKS)
- set(CMAKE_REQUIRED_LIBRARIES)
- mark_as_advanced(${_prefix}${_combined_name}_WORKS)
- set(_libraries_work ${${_prefix}${_combined_name}_WORKS})
- endif(_libraries_work)
- if(NOT _libraries_work)
- set(${LIBRARIES} FALSE)
- endif(NOT _libraries_work)
-endmacro(Check_Lapack_Libraries)
-
-
-if(BLAS_FOUND)
-
- # Intel MKL
- IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "mkl"))
- IF(MKL_LAPACK_LIBRARIES)
- SET(LAPACK_LIBRARIES ${MKL_LAPACK_LIBRARIES} ${MKL_LIBRARIES})
- ELSE(MKL_LAPACK_LIBRARIES)
- SET(LAPACK_LIBRARIES ${MKL_LIBRARIES})
- ENDIF(MKL_LAPACK_LIBRARIES)
- SET(LAPACK_INCLUDE_DIR ${MKL_INCLUDE_DIR})
- SET(LAPACK_INFO "mkl")
- ENDIF()
-
- # OpenBlas
- IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "open"))
- SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
- check_function_exists("cheev_" OPEN_LAPACK_WORKS)
- if(OPEN_LAPACK_WORKS)
- SET(LAPACK_INFO "open")
- else()
- message(STATUS "It seems OpenBlas has not been compiled with Lapack support")
- endif()
- endif()
-
- # GotoBlas
- IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "goto"))
- SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
- check_function_exists("cheev_" GOTO_LAPACK_WORKS)
- if(GOTO_LAPACK_WORKS)
- SET(LAPACK_INFO "goto")
- else()
- message(STATUS "It seems GotoBlas has not been compiled with Lapack support")
- endif()
- endif()
-
- # ACML
- IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "acml"))
- SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
- check_function_exists("cheev_" ACML_LAPACK_WORKS)
- if(ACML_LAPACK_WORKS)
- SET(LAPACK_INFO "acml")
- else()
- message(STATUS "Strangely, this ACML library does not support Lapack?!")
- endif()
- endif()
-
- # Accelerate
- IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "accelerate"))
- SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
- check_function_exists("cheev_" ACCELERATE_LAPACK_WORKS)
- if(ACCELERATE_LAPACK_WORKS)
- SET(LAPACK_INFO "accelerate")
- else()
- message(STATUS "Strangely, this Accelerate library does not support Lapack?!")
- endif()
- endif()
-
- # vecLib
- IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "veclib"))
- SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})
- check_function_exists("cheev_" VECLIB_LAPACK_WORKS)
- if(VECLIB_LAPACK_WORKS)
- SET(LAPACK_INFO "veclib")
- else()
- message(STATUS "Strangely, this vecLib library does not support Lapack?!")
- endif()
- endif()
-
- # Generic LAPACK library?
- IF((NOT LAPACK_INFO) AND ((BLAS_INFO STREQUAL "generic") OR (BLAS_INFO STREQUAL "open")))
- check_lapack_libraries(
- LAPACK_LIBRARIES
- LAPACK
- cheev
- ""
- "lapack"
- "${BLAS_LIBRARIES}"
- )
- if(LAPACK_LIBRARIES)
- SET(LAPACK_INFO "generic")
- endif(LAPACK_LIBRARIES)
- endif()
-
-else(BLAS_FOUND)
- message(STATUS "LAPACK requires BLAS")
-endif(BLAS_FOUND)
-
-if(LAPACK_INFO)
- set(LAPACK_FOUND TRUE)
-else(LAPACK_INFO)
- set(LAPACK_FOUND FALSE)
-endif(LAPACK_INFO)
-
-IF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED)
- message(FATAL_ERROR "Cannot find a library with LAPACK API. Please specify library location.")
-ENDIF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED)
-IF(NOT LAPACK_FIND_QUIETLY)
- IF(LAPACK_FOUND)
- MESSAGE(STATUS "Found a library with LAPACK API. (${LAPACK_INFO})")
- ELSE(LAPACK_FOUND)
- MESSAGE(STATUS "Cannot find a library with LAPACK API. Not using LAPACK.")
- ENDIF(LAPACK_FOUND)
-ENDIF(NOT LAPACK_FIND_QUIETLY)
-
-# Do nothing if LAPACK was found before
-ENDIF(NOT LAPACK_FOUND)
diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindMKL.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindMKL.cmake
deleted file mode 100644
index 08b450985..000000000
--- a/contrib/lua-torch/torch7/lib/TH/cmake/FindMKL.cmake
+++ /dev/null
@@ -1,272 +0,0 @@
-# - Find INTEL MKL library
-#
-# This module finds the Intel Mkl libraries.
-#
-# This module sets the following variables:
-# MKL_FOUND - set to true if a library implementing the CBLAS interface is found
-# MKL_VERSION - best guess
-# MKL_INCLUDE_DIR - path to include dir.
-# MKL_LIBRARIES - list of libraries for base mkl
-# MKL_LAPACK_LIBRARIES - list of libraries to add for lapack
-# MKL_SCALAPACK_LIBRARIES - list of libraries to add for scalapack
-# MKL_SOLVER_LIBRARIES - list of libraries to add for the solvers
-# MKL_CDFT_LIBRARIES - list of libraries to add for the solvers
-
-
-# Do nothing if MKL_FOUND was set before!
-IF (NOT MKL_FOUND)
-
-SET(MKL_VERSION)
-SET(MKL_INCLUDE_DIR)
-SET(MKL_LIBRARIES)
-SET(MKL_LAPACK_LIBRARIES)
-SET(MKL_SCALAPACK_LIBRARIES)
-SET(MKL_SOLVER_LIBRARIES)
-SET(MKL_CDFT_LIBRARIES)
-
-# Includes
-INCLUDE(CheckTypeSize)
-INCLUDE(CheckFunctionExists)
-
-# Intel Compiler Suite
-SET(INTEL_COMPILER_DIR CACHE STRING
- "Root directory of the Intel Compiler Suite (contains ipp, mkl, etc.)")
-SET(INTEL_MKL_DIR CACHE STRING
- "Root directory of the Intel MKL (standalone)")
-SET(INTEL_MKL_SEQUENTIAL OFF CACHE BOOL
- "Force using the sequential (non threaded) libraries")
-
-# Checks
-CHECK_TYPE_SIZE("void*" SIZE_OF_VOIDP)
-IF ("${SIZE_OF_VOIDP}" EQUAL 8)
- SET(mklvers "em64t")
- SET(iccvers "intel64")
- SET(mkl64s "_lp64")
-ELSE ("${SIZE_OF_VOIDP}" EQUAL 8)
- SET(mklvers "32")
- SET(iccvers "ia32")
- SET(mkl64s)
-ENDIF ("${SIZE_OF_VOIDP}" EQUAL 8)
-IF(CMAKE_COMPILER_IS_GNUCC)
- SET(mklthreads "mkl_gnu_thread" "mkl_intel_thread")
- SET(mklifaces "gf" "intel")
- SET(mklrtls "iomp5")
-ELSE(CMAKE_COMPILER_IS_GNUCC)
- SET(mklthreads "mkl_intel_thread")
- SET(mklifaces "intel")
- SET(mklrtls "iomp5" "guide")
- IF (MSVC)
- SET(mklrtls "libiomp5md")
- ENDIF (MSVC)
-ENDIF (CMAKE_COMPILER_IS_GNUCC)
-
-# Kernel libraries dynamically loaded
-SET(mklkerlibs "mc" "mc3" "nc" "p4n" "p4m" "p4m3" "p4p" "def")
-SET(mklseq)
-
-
-
-# Paths
-SET(saved_CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH})
-SET(saved_CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH})
-IF (INTEL_COMPILER_DIR)
- # TODO: diagnostic if dir does not exist
- SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}
- "${INTEL_COMPILER_DIR}/lib/${iccvers}")
- IF (NOT INTEL_MKL_DIR)
- SET(INTEL_MKL_DIR "${INTEL_COMPILER_DIR}/mkl")
- ENDIF (NOT INTEL_MKL_DIR)
-ENDIF (INTEL_COMPILER_DIR)
-IF (INTEL_MKL_DIR)
- # TODO: diagnostic if dir does not exist
- SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH}
- "${INTEL_MKL_DIR}/include")
- SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}
- "${INTEL_MKL_DIR}/lib/${mklvers}")
- IF (MSVC)
- SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}
- "${INTEL_MKL_DIR}/lib/${iccvers}")
- ENDIF (MSVC)
-ENDIF (INTEL_MKL_DIR)
-
-# Try linking multiple libs
-MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags)
- # This macro checks for the existence of the combination of libraries given by _list.
- # If the combination is found, this macro whether we can link against that library
- # combination using the name of a routine given by _name using the linker
- # flags given by _flags. If the combination of libraries is found and passes
- # the link test, LIBRARIES is set to the list of complete library paths that
- # have been found. Otherwise, LIBRARIES is set to FALSE.
- # N.B. _prefix is the prefix applied to the names of all cached variables that
- # are generated internally and marked advanced by this macro.
- SET(_prefix "${LIBRARIES}")
- # start checking
- SET(_libraries_work TRUE)
- SET(${LIBRARIES})
- SET(_combined_name)
- SET(_paths)
- set(__list)
- foreach(_elem ${_list})
- if(__list)
- set(__list "${__list} - ${_elem}")
- else(__list)
- set(__list "${_elem}")
- endif(__list)
- endforeach(_elem)
- message(STATUS "Checking for [${__list}]")
- FOREACH(_library ${_list})
- SET(_combined_name ${_combined_name}_${_library})
- IF(_libraries_work)
- FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library})
- MARK_AS_ADVANCED(${_prefix}_${_library}_LIBRARY)
- SET(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})
- SET(_libraries_work ${${_prefix}_${_library}_LIBRARY})
- IF(${_prefix}_${_library}_LIBRARY)
- MESSAGE(STATUS " Library ${_library}: ${${_prefix}_${_library}_LIBRARY}")
- ELSE(${_prefix}_${_library}_LIBRARY)
- MESSAGE(STATUS " Library ${_library}: not found")
- ENDIF(${_prefix}_${_library}_LIBRARY)
- ENDIF(_libraries_work)
- ENDFOREACH(_library ${_list})
- # Test this combination of libraries.
- IF(_libraries_work)
- SET(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}})
- CHECK_FUNCTION_EXISTS(${_name} ${_prefix}${_combined_name}_WORKS)
- SET(CMAKE_REQUIRED_LIBRARIES)
- MARK_AS_ADVANCED(${_prefix}${_combined_name}_WORKS)
- SET(_libraries_work ${${_prefix}${_combined_name}_WORKS})
- ENDIF(_libraries_work)
- # Fin
- IF(_libraries_work)
- ELSE (_libraries_work)
- SET(${LIBRARIES})
- MARK_AS_ADVANCED(${LIBRARIES})
- ENDIF(_libraries_work)
-ENDMACRO(CHECK_ALL_LIBRARIES)
-
-if(WIN32)
- set(mkl_m "")
-else(WIN32)
- set(mkl_m "m")
-endif(WIN32)
-
-
-# Check for version 10/11
-IF (NOT MKL_LIBRARIES)
- SET(MKL_VERSION 1011)
-ENDIF (NOT MKL_LIBRARIES)
-FOREACH(mklrtl ${mklrtls} "")
- FOREACH(mkliface ${mklifaces})
- FOREACH(mkl64 ${mkl64s} "")
- FOREACH(mklthread ${mklthreads})
- IF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL)
- CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm
- "mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m}" "")
- ENDIF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL)
- ENDFOREACH(mklthread)
- ENDFOREACH(mkl64)
- ENDFOREACH(mkliface)
-ENDFOREACH(mklrtl)
-FOREACH(mklrtl ${mklrtls} "")
- FOREACH(mkliface ${mklifaces})
- FOREACH(mkl64 ${mkl64s} "")
- IF (NOT MKL_LIBRARIES)
- CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm
- "mkl_${mkliface}${mkl64};mkl_sequential;mkl_core;${mkl_m}" "")
- IF (MKL_LIBRARIES)
- SET(mklseq "_sequential")
- ENDIF (MKL_LIBRARIES)
- ENDIF (NOT MKL_LIBRARIES)
- ENDFOREACH(mkl64)
- ENDFOREACH(mkliface)
-ENDFOREACH(mklrtl)
-FOREACH(mklrtl ${mklrtls} "")
- FOREACH(mkliface ${mklifaces})
- FOREACH(mkl64 ${mkl64s} "")
- FOREACH(mklthread ${mklthreads})
- IF (NOT MKL_LIBRARIES)
- CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm
- "mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m}" "")
- ENDIF (NOT MKL_LIBRARIES)
- ENDFOREACH(mklthread)
- ENDFOREACH(mkl64)
- ENDFOREACH(mkliface)
-ENDFOREACH(mklrtl)
-
-# Check for older versions
-IF (NOT MKL_LIBRARIES)
- SET(MKL_VERSION 900)
- CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm
- "mkl;guide;pthread;m" "")
-ENDIF (NOT MKL_LIBRARIES)
-
-# Include files
-IF (MKL_LIBRARIES)
- FIND_PATH(MKL_INCLUDE_DIR "mkl_cblas.h")
- MARK_AS_ADVANCED(MKL_INCLUDE_DIR)
-ENDIF (MKL_LIBRARIES)
-
-# Other libraries
-IF (MKL_LIBRARIES)
- FOREACH(mkl64 ${mkl64s} "_core" "")
- FOREACH(mkls ${mklseq} "")
- IF (NOT MKL_LAPACK_LIBRARIES)
- FIND_LIBRARY(MKL_LAPACK_LIBRARIES NAMES "mkl_lapack${mkl64}${mkls}")
- MARK_AS_ADVANCED(MKL_LAPACK_LIBRARIES)
- ENDIF (NOT MKL_LAPACK_LIBRARIES)
- IF (NOT MKL_SCALAPACK_LIBRARIES)
- FIND_LIBRARY(MKL_SCALAPACK_LIBRARIES NAMES "mkl_scalapack${mkl64}${mkls}")
- MARK_AS_ADVANCED(MKL_SCALAPACK_LIBRARIES)
- ENDIF (NOT MKL_SCALAPACK_LIBRARIES)
- IF (NOT MKL_SOLVER_LIBRARIES)
- FIND_LIBRARY(MKL_SOLVER_LIBRARIES NAMES "mkl_solver${mkl64}${mkls}")
- MARK_AS_ADVANCED(MKL_SOLVER_LIBRARIES)
- ENDIF (NOT MKL_SOLVER_LIBRARIES)
- IF (NOT MKL_CDFT_LIBRARIES)
- FIND_LIBRARY(MKL_CDFT_LIBRARIES NAMES "mkl_cdft${mkl64}${mkls}")
- MARK_AS_ADVANCED(MKL_CDFT_LIBRARIES)
- ENDIF (NOT MKL_CDFT_LIBRARIES)
- ENDFOREACH(mkls)
- ENDFOREACH(mkl64)
-ENDIF (MKL_LIBRARIES)
-
-# LibIRC: intel compiler always links this;
-# gcc does not; but mkl kernels sometimes need it.
-IF (MKL_LIBRARIES)
- IF (CMAKE_COMPILER_IS_GNUCC)
- FIND_LIBRARY(MKL_KERNEL_libirc "irc")
- ELSEIF (CMAKE_C_COMPILER_ID AND NOT CMAKE_C_COMPILER_ID STREQUAL "Intel")
- FIND_LIBRARY(MKL_KERNEL_libirc "irc")
- ENDIF (CMAKE_COMPILER_IS_GNUCC)
- MARK_AS_ADVANCED(MKL_KERNEL_libirc)
- IF (MKL_KERNEL_libirc)
- SET(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_KERNEL_libirc})
- ENDIF (MKL_KERNEL_libirc)
-ENDIF (MKL_LIBRARIES)
-
-# Final
-SET(CMAKE_LIBRARY_PATH ${saved_CMAKE_LIBRARY_PATH})
-SET(CMAKE_INCLUDE_PATH ${saved_CMAKE_INCLUDE_PATH})
-IF (MKL_LIBRARIES)
- SET(MKL_FOUND TRUE)
-ELSE (MKL_LIBRARIES)
- SET(MKL_FOUND FALSE)
- SET(MKL_VERSION)
-ENDIF (MKL_LIBRARIES)
-
-# Standard termination
-IF(NOT MKL_FOUND AND MKL_FIND_REQUIRED)
- MESSAGE(FATAL_ERROR "MKL library not found. Please specify library location")
-ENDIF(NOT MKL_FOUND AND MKL_FIND_REQUIRED)
-IF(NOT MKL_FIND_QUIETLY)
- IF(MKL_FOUND)
- MESSAGE(STATUS "MKL library found")
- ELSE(MKL_FOUND)
- MESSAGE(STATUS "MKL library not found")
- ENDIF(MKL_FOUND)
-ENDIF(NOT MKL_FIND_QUIETLY)
-
-# Do nothing if MKL_FOUND was set before!
-ENDIF (NOT MKL_FOUND)
-
-
diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindSSE.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindSSE.cmake
deleted file mode 100644
index a14abe8d4..000000000
--- a/contrib/lua-torch/torch7/lib/TH/cmake/FindSSE.cmake
+++ /dev/null
@@ -1,125 +0,0 @@
-INCLUDE(CheckCSourceRuns)
-INCLUDE(CheckCXXSourceRuns)
-
-SET(SSE1_CODE "
- #include <xmmintrin.h>
-
- int main()
- {
- __m128 a;
- float vals[4] = {0,0,0,0};
- a = _mm_loadu_ps(vals);
- return 0;
- }")
-
-SET(SSE2_CODE "
- #include <emmintrin.h>
-
- int main()
- {
- __m128d a;
- double vals[2] = {0,0};
- a = _mm_loadu_pd(vals);
- return 0;
- }")
-
-SET(SSE3_CODE "
- #include <pmmintrin.h>
-
- int main( )
- {
- const int vals[4] = {0,0,0,0};
- __m128i a;
- a = _mm_lddqu_si128( (const __m128i*)vals );
- return 0;
- }")
-
-SET(SSE4_1_CODE "
- #include <smmintrin.h>
-
- int main ()
- {
- __m128i a = {0,0,0,0}, b = {0,0,0,0};
- __m128i res = _mm_max_epi8(a, b);
-
- return 0;
- }
-")
-
-SET(SSE4_2_CODE "
- #include <nmmintrin.h>
-
- int main()
- {
- __m128i a = {0,0,0,0}, b = {0,0,0,0}, c = {0,0,0,0};
- c = _mm_cmpgt_epi64(a, b);
- return 0;
- }
-")
-
-SET(AVX_CODE "
- #include <immintrin.h>
-
- int main()
- {
- __m256 a;
- a = _mm256_set1_ps(0);
- return 0;
- }
-")
-
-SET(AVX2_CODE "
- #include <immintrin.h>
-
- int main()
- {
- __m256i a = {0};
- a = _mm256_abs_epi16(a);
- return 0;
- }
-")
-
-MACRO(CHECK_SSE lang type flags)
- SET(__FLAG_I 1)
- SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
- FOREACH(__FLAG ${flags})
- IF(NOT ${lang}_${type}_FOUND)
- SET(CMAKE_REQUIRED_FLAGS ${__FLAG})
- IF(lang STREQUAL "CXX")
- CHECK_CXX_SOURCE_RUNS("${${type}_CODE}" ${lang}_HAS_${type}_${__FLAG_I})
- ELSE()
- CHECK_C_SOURCE_RUNS("${${type}_CODE}" ${lang}_HAS_${type}_${__FLAG_I})
- ENDIF()
- IF(${lang}_HAS_${type}_${__FLAG_I})
- SET(${lang}_${type}_FOUND TRUE CACHE BOOL "${lang} ${type} support")
- SET(${lang}_${type}_FLAGS "${__FLAG}" CACHE STRING "${lang} ${type} flags")
- ENDIF()
- MATH(EXPR __FLAG_I "${__FLAG_I}+1")
- ENDIF()
- ENDFOREACH()
- SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
-
- IF(NOT ${lang}_${type}_FOUND)
- SET(${lang}_${type}_FOUND FALSE CACHE BOOL "${lang} ${type} support")
- SET(${lang}_${type}_FLAGS "" CACHE STRING "${lang} ${type} flags")
- ENDIF()
-
- MARK_AS_ADVANCED(${lang}_${type}_FOUND ${lang}_${type}_FLAGS)
-
-ENDMACRO()
-
-CHECK_SSE(C "SSE1" " ;-msse;/arch:SSE")
-CHECK_SSE(C "SSE2" " ;-msse2;/arch:SSE2")
-CHECK_SSE(C "SSE3" " ;-msse3;/arch:SSE3")
-CHECK_SSE(C "SSE4_1" " ;-msse4.1;-msse4;/arch:SSE4")
-CHECK_SSE(C "SSE4_2" " ;-msse4.2;-msse4;/arch:SSE4")
-CHECK_SSE(C "AVX" " ;-mavx;/arch:AVX")
-CHECK_SSE(C "AVX2" " ;-mavx2 -mfma;/arch:AVX2")
-
-CHECK_SSE(CXX "SSE1" " ;-msse;/arch:SSE")
-CHECK_SSE(CXX "SSE2" " ;-msse2;/arch:SSE2")
-CHECK_SSE(CXX "SSE3" " ;-msse3;/arch:SSE3")
-CHECK_SSE(CXX "SSE4_1" " ;-msse4.1;-msse4;/arch:SSE4")
-CHECK_SSE(CXX "SSE4_2" " ;-msse4.2;-msse4;/arch:SSE4")
-CHECK_SSE(CXX "AVX" " ;-mavx;/arch:AVX")
-CHECK_SSE(CXX "AVX2" " ;-mavx2 -mfma;/arch:AVX2")
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.c b/contrib/lua-torch/torch7/lib/TH/generic/THBlas.c
deleted file mode 100644
index b04931f34..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.c
+++ /dev/null
@@ -1,412 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THBlas.c"
-#else
-
-
-#ifdef BLAS_F2C
-# define ffloat double
-#else
-# define ffloat float
-#endif
-
-TH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC void dscal_(int *n, double *a, double *x, int *incx);
-TH_EXTERNC void sscal_(int *n, float *a, float *x, int *incx);
-TH_EXTERNC void dcopy_(int *n, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC void scopy_(int *n, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC void daxpy_(int *n, double *a, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC void saxpy_(int *n, float *a, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC double ddot_(int *n, double *x, int *incx, double *y, int *incy);
-TH_EXTERNC ffloat sdot_(int *n, float *x, int *incx, float *y, int *incy);
-TH_EXTERNC void dgemv_(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy);
-TH_EXTERNC void sgemv_(char *trans, int *m, int *n, float *alpha, float *a, int *lda, float *x, int *incx, float *beta, float *y, int *incy);
-TH_EXTERNC void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *a, int *lda);
-TH_EXTERNC void sger_(int *m, int *n, float *alpha, float *x, int *incx, float *y, int *incy, float *a, int *lda);
-TH_EXTERNC void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc);
-TH_EXTERNC void sgemm_(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc);
-
-
-
-void THBlas_(swap)(long n, real *x, long incx, real *y, long incy)
-{
- if(n == 1)
- {
- incx = 1;
- incy = 1;
- }
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
- {
- int i_n = (int)n;
- int i_incx = (int)incx;
- int i_incy = (int)incy;
-
-#if defined(TH_REAL_IS_DOUBLE)
- dswap_(&i_n, x, &i_incx, y, &i_incy);
-#else
- sswap_(&i_n, x, &i_incx, y, &i_incy);
-#endif
- return;
- }
-#endif
- {
- long i;
- for(i = 0; i < n; i++)
- {
- real z = x[i*incx];
- x[i*incx] = y[i*incy];
- y[i*incy] = z;
- }
- }
-}
-
-void THBlas_(scal)(long n, real a, real *x, long incx)
-{
- if(n == 1)
- incx = 1;
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (n <= INT_MAX) && (incx <= INT_MAX) )
- {
- int i_n = (int)n;
- int i_incx = (int)incx;
-
-#if defined(TH_REAL_IS_DOUBLE)
- dscal_(&i_n, &a, x, &i_incx);
-#else
- sscal_(&i_n, &a, x, &i_incx);
-#endif
- return;
- }
-#endif
- {
- long i;
- for(i = 0; i < n; i++) {
- if (a == 0) {
- x[i*incx] = 0;
- } else {
- x[i*incx] *= a;
- }
- }
- }
-}
-
-void THBlas_(copy)(long n, real *x, long incx, real *y, long incy)
-{
- if(n == 1)
- {
- incx = 1;
- incy = 1;
- }
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
- {
- int i_n = (int)n;
- int i_incx = (int)incx;
- int i_incy = (int)incy;
-
-#if defined(TH_REAL_IS_DOUBLE)
- dcopy_(&i_n, x, &i_incx, y, &i_incy);
-#else
- scopy_(&i_n, x, &i_incx, y, &i_incy);
-#endif
- return;
- }
-#endif
- {
- long i;
- for(i = 0; i < n; i++)
- y[i*incy] = x[i*incx];
- }
-}
-
-void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy)
-{
- if(n == 1)
- {
- incx = 1;
- incy = 1;
- }
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
- {
- int i_n = (int)n;
- int i_incx = (int)incx;
- int i_incy = (int)incy;
-
-#if defined(TH_REAL_IS_DOUBLE)
- daxpy_(&i_n, &a, x, &i_incx, y, &i_incy);
-#else
- saxpy_(&i_n, &a, x, &i_incx, y, &i_incy);
-#endif
- return;
- }
-#endif
- {
- long i;
- for(i = 0; i < n; i++)
- y[i*incy] += a*x[i*incx];
- }
-}
-
-real THBlas_(dot)(long n, real *x, long incx, real *y, long incy)
-{
- if(n == 1)
- {
- incx = 1;
- incy = 1;
- }
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
- {
- int i_n = (int)n;
- int i_incx = (int)incx;
- int i_incy = (int)incy;
-
-#if defined(TH_REAL_IS_DOUBLE)
- return (real) ddot_(&i_n, x, &i_incx, y, &i_incy);
-#else
- return (real) sdot_(&i_n, x, &i_incx, y, &i_incy);
-#endif
- }
-#endif
- {
- long i;
- real sum = 0;
- for(i = 0; i < n; i++)
- sum += x[i*incx]*y[i*incy];
- return sum;
- }
-}
-
-void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy)
-{
- if(n == 1)
- lda = m;
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (m <= INT_MAX) && (n <= INT_MAX) &&
- (lda > 0) && (lda <= INT_MAX) &&
- (incx > 0) && (incx <= INT_MAX) &&
- (incy > 0) && (incy <= INT_MAX) )
- {
- int i_m = (int)m;
- int i_n = (int)n;
- int i_lda = (int)lda;
- int i_incx = (int)incx;
- int i_incy = (int)incy;
-
-#if defined(TH_REAL_IS_DOUBLE)
- dgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy);
-#else
- sgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy);
-#endif
- return;
- }
-#endif
- {
- long i, j;
-
- if( (trans == 'T') || (trans == 't') )
- {
- for(i = 0; i < n; i++)
- {
- real sum = 0;
- real *row_ = a+lda*i;
- for(j = 0; j < m; j++)
- sum += x[j*incx]*row_[j];
- if (beta == 0)
- y[i*incy] = alpha*sum;
- else
- y[i*incy] = beta*y[i*incy] + alpha*sum;
- }
- }
- else
- {
- if(beta != 1)
- THBlas_(scal)(m, beta, y, incy);
-
- for(j = 0; j < n; j++)
- {
- real *column_ = a+lda*j;
- real z = alpha*x[j*incx];
- for(i = 0; i < m; i++)
- y[i*incy] += z*column_[i];
- }
- }
- }
-}
-
-void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda)
-{
- if(n == 1)
- lda = m;
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
- {
- int i_m = (int)m;
- int i_n = (int)n;
- int i_lda = (int)lda;
- int i_incx = (int)incx;
- int i_incy = (int)incy;
-
-#if defined(TH_REAL_IS_DOUBLE)
- dger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda);
-#else
- sger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda);
-#endif
- return;
- }
-#endif
- {
- long i, j;
- for(j = 0; j < n; j++)
- {
- real *column_ = a+j*lda;
- real z = alpha*y[j*incy];
- for(i = 0; i < m; i++)
- column_[i] += z*x[i*incx] ;
- }
- }
-}
-
-void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc)
-{
- int transa_ = ((transa == 't') || (transa == 'T'));
- int transb_ = ((transb == 't') || (transb == 'T'));
-
- if(n == 1)
- ldc = m;
-
- if(transa_)
- {
- if(m == 1)
- lda = k;
- }
- else
- {
- if(k == 1)
- lda = m;
- }
-
- if(transb_)
- {
- if(k == 1)
- ldb = n;
- }
- else
- {
- if(n == 1)
- ldb = k;
- }
-
-#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
- if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX) && (ldb <= INT_MAX) && (ldc <= INT_MAX) )
- {
- int i_m = (int)m;
- int i_n = (int)n;
- int i_k = (int)k;
- int i_lda = (int)lda;
- int i_ldb = (int)ldb;
- int i_ldc = (int)ldc;
-
-#if defined(TH_REAL_IS_DOUBLE)
- dgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc);
-#else
- sgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc);
-#endif
- return;
- }
-#endif
- {
- long i, j, l;
- if(!transa_ && !transb_)
- {
- real *a_ = a;
- for(i = 0; i < m; i++)
- {
- real *b_ = b;
- for(j = 0; j < n; j++)
- {
- real sum = 0;
- for(l = 0; l < k; l++)
- sum += a_[l*lda]*b_[l];
- b_ += ldb;
- if (beta == 0)
- c[j*ldc+i] = alpha*sum;
- else
- c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;
- }
- a_++;
- }
- }
- else if(transa_ && !transb_)
- {
- real *a_ = a;
- for(i = 0; i < m; i++)
- {
- real *b_ = b;
- for(j = 0; j < n; j++)
- {
- real sum = 0;
- for(l = 0; l < k; l++)
- sum += a_[l]*b_[l];
- b_ += ldb;
- if (beta == 0)
- c[j*ldc+i] = alpha*sum;
- else
- c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;
- }
- a_ += lda;
- }
- }
- else if(!transa_ && transb_)
- {
- real *a_ = a;
- for(i = 0; i < m; i++)
- {
- real *b_ = b;
- for(j = 0; j < n; j++)
- {
- real sum = 0;
- for(l = 0; l < k; l++)
- sum += a_[l*lda]*b_[l*ldb];
- b_++;
- if (beta == 0)
- c[j*ldc+i] = alpha*sum;
- else
- c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;
- }
- a_++;
- }
- }
- else
- {
- real *a_ = a;
- for(i = 0; i < m; i++)
- {
- real *b_ = b;
- for(j = 0; j < n; j++)
- {
- real sum = 0;
- for(l = 0; l < k; l++)
- sum += a_[l]*b_[l*ldb];
- b_++;
- if (beta == 0)
- c[j*ldc+i] = alpha*sum;
- else
- c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;
- }
- a_ += lda;
- }
- }
- }
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.h b/contrib/lua-torch/torch7/lib/TH/generic/THBlas.h
deleted file mode 100644
index 9e14f5a84..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THBlas.h"
-#else
-
-/* Level 1 */
-TH_API void THBlas_(swap)(long n, real *x, long incx, real *y, long incy);
-TH_API void THBlas_(scal)(long n, real a, real *x, long incx);
-TH_API void THBlas_(copy)(long n, real *x, long incx, real *y, long incy);
-TH_API void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy);
-TH_API real THBlas_(dot)(long n, real *x, long incx, real *y, long incy);
-
-/* Level 2 */
-TH_API void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy);
-TH_API void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda);
-
-/* Level 3 */
-TH_API void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.c b/contrib/lua-torch/torch7/lib/TH/generic/THLapack.c
deleted file mode 100644
index 148ae26c4..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.c
+++ /dev/null
@@ -1,270 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THLapack.c"
-#else
-
-
-TH_EXTERNC void dgesv_(int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);
-TH_EXTERNC void sgesv_(int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info);
-TH_EXTERNC void dtrtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);
-TH_EXTERNC void strtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info);
-TH_EXTERNC void dgels_(char *trans, int *m, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, double *work, int *lwork, int *info);
-TH_EXTERNC void sgels_(char *trans, int *m, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, float *work, int *lwork, int *info);
-TH_EXTERNC void dsyev_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info);
-TH_EXTERNC void ssyev_(char *jobz, char *uplo, int *n, float *a, int *lda, float *w, float *work, int *lwork, int *info);
-TH_EXTERNC void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda, double *wr, double *wi, double* vl, int *ldvl, double *vr, int *ldvr, double *work, int *lwork, int *info);
-TH_EXTERNC void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda, float *wr, float *wi, float* vl, int *ldvl, float *vr, int *ldvr, float *work, int *lwork, int *info);
-TH_EXTERNC void dgesvd_(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info);
-TH_EXTERNC void sgesvd_(char *jobu, char *jobvt, int *m, int *n, float *a, int *lda, float *s, float *u, int *ldu, float *vt, int *ldvt, float *work, int *lwork, int *info);
-TH_EXTERNC void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info);
-TH_EXTERNC void sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info);
-TH_EXTERNC void dgetrs_(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);
-TH_EXTERNC void sgetrs_(char *trans, int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info);
-TH_EXTERNC void dgetri_(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info);
-TH_EXTERNC void sgetri_(int *n, float *a, int *lda, int *ipiv, float *work, int *lwork, int *info);
-TH_EXTERNC void dpotrf_(char *uplo, int *n, double *a, int *lda, int *info);
-TH_EXTERNC void spotrf_(char *uplo, int *n, float *a, int *lda, int *info);
-TH_EXTERNC void dpotri_(char *uplo, int *n, double *a, int *lda, int *info);
-TH_EXTERNC void spotri_(char *uplo, int *n, float *a, int *lda, int *info);
-TH_EXTERNC void dpotrs_(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);
-TH_EXTERNC void spotrs_(char *uplo, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info);
-TH_EXTERNC void sgeqrf_(int *m, int *n, float *a, int *lda, float *tau, float *work, int *lwork, int *info);
-TH_EXTERNC void dgeqrf_(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info);
-TH_EXTERNC void sorgqr_(int *m, int *n, int *k, float *a, int *lda, float *tau, float *work, int *lwork, int *info);
-TH_EXTERNC void dorgqr_(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info);
-TH_EXTERNC void sormqr_(char *side, char *trans, int *m, int *n, int *k, float *a, int *lda, float *tau, float *c, int *ldc, float *work, int *lwork, int *info);
-TH_EXTERNC void dormqr_(char *side, char *trans, int *m, int *n, int *k, double *a, int *lda, double *tau, double *c, int *ldc, double *work, int *lwork, int *info);
-TH_EXTERNC void spstrf_(char *uplo, int *n, float *a, int *lda, int *piv, int *rank, float *tol, float *work, int *info);
-TH_EXTERNC void dpstrf_(char *uplo, int *n, double *a, int *lda, int *piv, int *rank, double *tol, double *work, int *info);
-
-
-/* Compute the solution to a real system of linear equations A * X = B */
-void THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info);
-#else
- sgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info);
-#endif
-#else
- THError("gesv : Lapack library not found in compile time\n");
-#endif
- return;
-}
-
-/* Solve a triangular system of the form A * X = B or A^T * X = B */
-void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info);
-#else
- strtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info);
-#endif
-#else
- THError("trtrs : Lapack library not found in compile time\n");
-#endif
- return;
-}
-
-/* Solve overdetermined or underdetermined real linear systems involving an
-M-by-N matrix A, or its transpose, using a QR or LQ factorization of A */
-void THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info);
-#else
- sgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info);
-#endif
-#else
- THError("gels : Lapack library not found in compile time\n");
-#endif
-}
-
-/* Compute all eigenvalues and, optionally, eigenvectors of a real symmetric
-matrix A */
-void THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dsyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);
-#else
- ssyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);
-#endif
-#else
- THError("syev : Lapack library not found in compile time\n");
-#endif
-}
-
-/* Compute for an N-by-N real nonsymmetric matrix A, the eigenvalues and,
-optionally, the left and/or right eigenvectors */
-void THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info);
-#else
- sgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info);
-#endif
-#else
- THError("geev : Lapack library not found in compile time\n");
-#endif
-}
-
-/* Compute the singular value decomposition (SVD) of a real M-by-N matrix A,
-optionally computing the left and/or right singular vectors */
-void THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgesvd_( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info);
-#else
- sgesvd_( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info);
-#endif
-#else
- THError("gesvd : Lapack library not found in compile time\n");
-#endif
-}
-
-/* LU decomposition */
-void THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgetrf_(&m, &n, a, &lda, ipiv, info);
-#else
- sgetrf_(&m, &n, a, &lda, ipiv, info);
-#endif
-#else
- THError("getrf : Lapack library not found in compile time\n");
-#endif
-}
-
-void THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info);
-#else
- sgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info);
-#endif
-#else
- THError("getrs : Lapack library not found in compile time\n");
-#endif
-}
-
-/* Matrix Inverse */
-void THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgetri_(&n, a, &lda, ipiv, work, &lwork, info);
-#else
- sgetri_(&n, a, &lda, ipiv, work, &lwork, info);
-#endif
-#else
- THError("getri : Lapack library not found in compile time\n");
-#endif
-}
-
-/* Cholesky factorization */
-void THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dpotrf_(&uplo, &n, a, &lda, info);
-#else
- spotrf_(&uplo, &n, a, &lda, info);
-#endif
-#else
- THError("potrf : Lapack library not found in compile time\n");
-#endif
-}
-
-/* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */
-void THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dpotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info);
-#else
- spotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info);
-#endif
-#else
- THError("potrs: Lapack library not found in compile time\n");
-#endif
-}
-
-/* Cholesky factorization based Matrix Inverse */
-void THLapack_(potri)(char uplo, int n, real *a, int lda, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dpotri_(&uplo, &n, a, &lda, info);
-#else
- spotri_(&uplo, &n, a, &lda, info);
-#endif
-#else
- THError("potri: Lapack library not found in compile time\n");
-#endif
-}
-
-/* Cholesky factorization with complete pivoting */
-void THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dpstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info);
-#else
- spstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info);
-#endif
-#else
- THError("pstrf: Lapack library not found at compile time\n");
-#endif
-}
-
-/* QR decomposition */
-void THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info);
-#else
- sgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info);
-#endif
-#else
- THError("geqrf: Lapack library not found in compile time\n");
-#endif
-}
-
-/* Build Q from output of geqrf */
-void THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info);
-#else
- sorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info);
-#endif
-#else
- THError("orgqr: Lapack library not found in compile time\n");
-#endif
-}
-
-/* Multiply Q with a matrix using the output of geqrf */
-void THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info)
-{
-#ifdef USE_LAPACK
-#if defined(TH_REAL_IS_DOUBLE)
- dormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info);
-#else
- sormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info);
-#endif
-#else
- THError("ormqr: Lapack library not found in compile time\n");
-#endif
-}
-
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.h b/contrib/lua-torch/torch7/lib/TH/generic/THLapack.h
deleted file mode 100644
index b464dd2d2..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THLapack.h"
-#else
-
-/* AX=B */
-TH_API void THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info);
-/* Solve a triangular system of the form A * X = B or A^T * X = B */
-TH_API void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info);
-/* ||AX-B|| */
-TH_API void THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info);
-/* Eigenvals */
-TH_API void THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info);
-/* Non-sym eigenvals */
-TH_API void THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info);
-/* svd */
-TH_API void THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info);
-/* LU decomposition */
-TH_API void THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info);
-TH_API void THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info);
-/* Matrix Inverse */
-TH_API void THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info);
-
-/* Positive Definite matrices */
-/* Cholesky factorization */
-void THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info);
-/* Matrix inverse based on Cholesky factorization */
-void THLapack_(potri)(char uplo, int n, real *a, int lda, int *info);
-/* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */
-void THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info);
-/* Cholesky factorization with complete pivoting. */
-void THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info);
-
-/* QR decomposition */
-void THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info);
-/* Build Q from output of geqrf */
-void THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info);
-/* Multiply Q with a matrix from output of geqrf */
-void THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.c b/contrib/lua-torch/torch7/lib/TH/generic/THStorage.c
deleted file mode 100644
index a592cfb62..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.c
+++ /dev/null
@@ -1,226 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THStorage.c"
-#else
-
-real* THStorage_(data)(const THStorage *self)
-{
- return self->data;
-}
-
-ptrdiff_t THStorage_(size)(const THStorage *self)
-{
- return self->size;
-}
-
-size_t THStorage_(elementSize)()
-{
- return sizeof(real);
-}
-
-THStorage* THStorage_(new)(void)
-{
- return THStorage_(newWithSize)(0);
-}
-
-THStorage* THStorage_(newWithSize)(ptrdiff_t size)
-{
- return THStorage_(newWithAllocator)(size, &THDefaultAllocator, NULL);
-}
-
-THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,
- THAllocator *allocator,
- void *allocatorContext)
-{
- THStorage *storage = THAlloc(sizeof(THStorage));
- storage->data = allocator->malloc(allocatorContext, sizeof(real)*size);
- storage->size = size;
- storage->refcount = 1;
- storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
- storage->allocator = allocator;
- storage->allocatorContext = allocatorContext;
- return storage;
-}
-
-THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags)
-{
- THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags);
-
- THStorage *storage = THStorage_(newWithAllocator)(size,
- &THMapAllocator,
- ctx);
-
- if(size <= 0)
- storage->size = THMapAllocatorContext_size(ctx)/sizeof(real);
-
- THStorage_(clearFlag)(storage, TH_STORAGE_RESIZABLE);
-
- return storage;
-}
-
-THStorage* THStorage_(newWithSize1)(real data0)
-{
- THStorage *self = THStorage_(newWithSize)(1);
- self->data[0] = data0;
- return self;
-}
-
-THStorage* THStorage_(newWithSize2)(real data0, real data1)
-{
- THStorage *self = THStorage_(newWithSize)(2);
- self->data[0] = data0;
- self->data[1] = data1;
- return self;
-}
-
-THStorage* THStorage_(newWithSize3)(real data0, real data1, real data2)
-{
- THStorage *self = THStorage_(newWithSize)(3);
- self->data[0] = data0;
- self->data[1] = data1;
- self->data[2] = data2;
- return self;
-}
-
-THStorage* THStorage_(newWithSize4)(real data0, real data1, real data2, real data3)
-{
- THStorage *self = THStorage_(newWithSize)(4);
- self->data[0] = data0;
- self->data[1] = data1;
- self->data[2] = data2;
- self->data[3] = data3;
- return self;
-}
-
-void THStorage_(setFlag)(THStorage *storage, const char flag)
-{
- storage->flag |= flag;
-}
-
-void THStorage_(clearFlag)(THStorage *storage, const char flag)
-{
- storage->flag &= ~flag;
-}
-
-void THStorage_(retain)(THStorage *storage)
-{
- if(storage && (storage->flag & TH_STORAGE_REFCOUNTED))
- THAtomicIncrementRef(&storage->refcount);
-}
-
-void THStorage_(free)(THStorage *storage)
-{
- if(!storage)
- return;
-
- if((storage->flag & TH_STORAGE_REFCOUNTED) && (THAtomicGet(&storage->refcount) > 0))
- {
- if(THAtomicDecrementRef(&storage->refcount))
- {
- if(storage->flag & TH_STORAGE_FREEMEM) {
- storage->allocator->free(storage->allocatorContext, storage->data);
- }
- if(storage->flag & TH_STORAGE_VIEW) {
- THStorage_(free)(storage->view);
- }
- THFree(storage);
- }
- }
-}
-
-THStorage* THStorage_(newWithData)(real *data, ptrdiff_t size)
-{
- return THStorage_(newWithDataAndAllocator)(data, size,
- &THDefaultAllocator, NULL);
-}
-
-THStorage* THStorage_(newWithDataAndAllocator)(real* data, ptrdiff_t size,
- THAllocator* allocator,
- void* allocatorContext) {
- THStorage *storage = THAlloc(sizeof(THStorage));
- storage->data = data;
- storage->size = size;
- storage->refcount = 1;
- storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
- storage->allocator = allocator;
- storage->allocatorContext = allocatorContext;
- return storage;
-}
-
-void THStorage_(resize)(THStorage *storage, ptrdiff_t size)
-{
- if(storage->flag & TH_STORAGE_RESIZABLE)
- {
- if(storage->allocator->realloc == NULL) {
- /* case when the allocator does not have a realloc defined */
- real *old_data = storage->data;
- ptrdiff_t old_size = storage->size;
- if (size == 0) {
- storage->data = NULL;
- } else {
- storage->data = storage->allocator->malloc(
- storage->allocatorContext,
- sizeof(real)*size);
- }
- storage->size = size;
- if (old_data != NULL) {
- ptrdiff_t copy_size = old_size;
- if (storage->size < copy_size) {
- copy_size = storage->size;
- }
- if (copy_size > 0) {
- memcpy(storage->data, old_data, sizeof(real)*copy_size);
- }
- storage->allocator->free(storage->allocatorContext, old_data);
- }
- } else {
- storage->data = storage->allocator->realloc(
- storage->allocatorContext,
- storage->data,
- sizeof(real)*size);
- storage->size = size;
- }
- } else {
- THError("Trying to resize storage that is not resizable");
- }
-}
-
-void THStorage_(fill)(THStorage *storage, real value)
-{
- ptrdiff_t i;
- for(i = 0; i < storage->size; i++)
- storage->data[i] = value;
-}
-
-void THStorage_(set)(THStorage *self, ptrdiff_t idx, real value)
-{
- THArgCheck((idx >= 0) && (idx < self->size), 2, "out of bounds");
- self->data[idx] = value;
-}
-
-real THStorage_(get)(const THStorage *self, ptrdiff_t idx)
-{
- THArgCheck((idx >= 0) && (idx < self->size), 2, "out of bounds");
- return self->data[idx];
-}
-
-void THStorage_(swap)(THStorage *storage1, THStorage *storage2)
-{
-#define SWAP(val) { val = storage1->val; storage1->val = storage2->val; storage2->val = val; }
- real *data;
- ptrdiff_t size;
- char flag;
- THAllocator *allocator;
- void *allocatorContext;
- struct THStorage *view;
-
- SWAP(data);
- SWAP(size);
- SWAP(flag);
- // don't swap refcount!
- SWAP(allocator);
- SWAP(allocatorContext);
- SWAP(view);
-#undef SWAP
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.h b/contrib/lua-torch/torch7/lib/TH/generic/THStorage.h
deleted file mode 100644
index 3dd214b33..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THStorage.h"
-#else
-
-/* on pourrait avoir un liste chainee
- qui initialise math, lab structures (or more).
- mouais -- complique.
-
- Pb: THMapStorage is kind of a class
- THLab_()... comment je m'en sors?
-
- en template, faudrait que je les instancie toutes!!! oh boy!
- Et comment je sais que c'est pour Cuda? Le type float est le meme dans les <>
-
- au bout du compte, ca serait sur des pointeurs float/double... etc... = facile.
- primitives??
- */
-
-#define TH_STORAGE_REFCOUNTED 1
-#define TH_STORAGE_RESIZABLE 2
-#define TH_STORAGE_FREEMEM 4
-#define TH_STORAGE_VIEW 8
-
-typedef struct THStorage
-{
- real *data;
- ptrdiff_t size;
- int refcount;
- char flag;
- THAllocator *allocator;
- void *allocatorContext;
- struct THStorage *view;
-} THStorage;
-
-TH_API real* THStorage_(data)(const THStorage*);
-TH_API ptrdiff_t THStorage_(size)(const THStorage*);
-TH_API size_t THStorage_(elementSize)(void);
-
-/* slow access -- checks everything */
-TH_API void THStorage_(set)(THStorage*, ptrdiff_t, real);
-TH_API real THStorage_(get)(const THStorage*, ptrdiff_t);
-
-TH_API THStorage* THStorage_(new)(void);
-TH_API THStorage* THStorage_(newWithSize)(ptrdiff_t size);
-TH_API THStorage* THStorage_(newWithSize1)(real);
-TH_API THStorage* THStorage_(newWithSize2)(real, real);
-TH_API THStorage* THStorage_(newWithSize3)(real, real, real);
-TH_API THStorage* THStorage_(newWithSize4)(real, real, real, real);
-TH_API THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags);
-
-/* takes ownership of data */
-TH_API THStorage* THStorage_(newWithData)(real *data, ptrdiff_t size);
-
-TH_API THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,
- THAllocator* allocator,
- void *allocatorContext);
-TH_API THStorage* THStorage_(newWithDataAndAllocator)(
- real* data, ptrdiff_t size, THAllocator* allocator, void *allocatorContext);
-
-/* should not differ with API */
-TH_API void THStorage_(setFlag)(THStorage *storage, const char flag);
-TH_API void THStorage_(clearFlag)(THStorage *storage, const char flag);
-TH_API void THStorage_(retain)(THStorage *storage);
-TH_API void THStorage_(swap)(THStorage *storage1, THStorage *storage2);
-
-/* might differ with other API (like CUDA) */
-TH_API void THStorage_(free)(THStorage *storage);
-TH_API void THStorage_(resize)(THStorage *storage, ptrdiff_t size);
-TH_API void THStorage_(fill)(THStorage *storage, real value);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.c b/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.c
deleted file mode 100644
index ce4b57eaf..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THStorageCopy.c"
-#else
-
-void THStorage_(rawCopy)(THStorage *storage, real *src)
-{
- ptrdiff_t i;
- for(i = 0; i < storage->size; i++)
- storage->data[i] = src[i];
-}
-
-void THStorage_(copy)(THStorage *storage, THStorage *src)
-{
- THArgCheck(storage->size == src->size, 2, "size mismatch");
- THStorage_(rawCopy)(storage, src->data);
-}
-
-#define IMPLEMENT_THStorage_COPY(TYPENAMESRC) \
-void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \
-{ \
- ptrdiff_t i; \
- for(i = 0; i < storage->size; i++) \
- storage->data[i] = (real)src->data[i]; \
-}
-
-#define IMPLEMENT_THStorage_COPY_FROM_HALF(TYPENAMESRC) \
-void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \
-{ \
- THArgCheck(storage->size == src->size, 2, "size mismatch"); \
- ptrdiff_t i; \
- for(i = 0; i < storage->size; i++) \
- storage->data[i] = (real)TH_half2float(src->data[i]); \
-}
-
-#define IMPLEMENT_THStorage_COPY_TO_HALF(TYPENAMESRC) \
-void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \
-{ \
- THArgCheck(storage->size == src->size, 2, "size mismatch"); \
- ptrdiff_t i; \
- for(i = 0; i < storage->size; i++) \
- storage->data[i] = TH_float2half((float)(src->data[i])); \
-}
-
-#define IMPLEMENT_THStorage_COPY_TO_FROM_HALF(TYPENAMESRC) \
-void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \
-{ \
- THArgCheck(storage->size == src->size, 2, "size mismatch"); \
- ptrdiff_t i; \
- for(i = 0; i < storage->size; i++) \
- storage->data[i] = src->data[i]; \
-}
-
-#ifndef TH_REAL_IS_HALF
-IMPLEMENT_THStorage_COPY(Byte)
-IMPLEMENT_THStorage_COPY(Char)
-IMPLEMENT_THStorage_COPY(Short)
-IMPLEMENT_THStorage_COPY(Int)
-IMPLEMENT_THStorage_COPY(Long)
-IMPLEMENT_THStorage_COPY(Float)
-IMPLEMENT_THStorage_COPY(Double)
-IMPLEMENT_THStorage_COPY_FROM_HALF(Half)
-#else
-/* only allow pass-through for Half */
-IMPLEMENT_THStorage_COPY_TO_FROM_HALF(Half)
-IMPLEMENT_THStorage_COPY_TO_HALF(Byte)
-IMPLEMENT_THStorage_COPY_TO_HALF(Char)
-IMPLEMENT_THStorage_COPY_TO_HALF(Short)
-IMPLEMENT_THStorage_COPY_TO_HALF(Int)
-IMPLEMENT_THStorage_COPY_TO_HALF(Long)
-IMPLEMENT_THStorage_COPY_TO_HALF(Float)
-IMPLEMENT_THStorage_COPY_TO_HALF(Double)
-#endif
-
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.h b/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.h
deleted file mode 100644
index ce8a2a690..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THStorageCopy.h"
-#else
-
-/* Support for copy between different Storage types */
-
-TH_API void THStorage_(rawCopy)(THStorage *storage, real *src);
-TH_API void THStorage_(copy)(THStorage *storage, THStorage *src);
-TH_API void THStorage_(copyByte)(THStorage *storage, struct THByteStorage *src);
-TH_API void THStorage_(copyChar)(THStorage *storage, struct THCharStorage *src);
-TH_API void THStorage_(copyShort)(THStorage *storage, struct THShortStorage *src);
-TH_API void THStorage_(copyInt)(THStorage *storage, struct THIntStorage *src);
-TH_API void THStorage_(copyLong)(THStorage *storage, struct THLongStorage *src);
-TH_API void THStorage_(copyFloat)(THStorage *storage, struct THFloatStorage *src);
-TH_API void THStorage_(copyDouble)(THStorage *storage, struct THDoubleStorage *src);
-TH_API void THStorage_(copyHalf)(THStorage *storage, struct THHalfStorage *src);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensor.c
deleted file mode 100644
index e44e06ec3..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.c
+++ /dev/null
@@ -1,939 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensor.c"
-#else
-
-/**** access methods ****/
-THStorage *THTensor_(storage)(const THTensor *self)
-{
- return self->storage;
-}
-
-ptrdiff_t THTensor_(storageOffset)(const THTensor *self)
-{
- return self->storageOffset;
-}
-
-int THTensor_(nDimension)(const THTensor *self)
-{
- return self->nDimension;
-}
-
-long THTensor_(size)(const THTensor *self, int dim)
-{
- THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "dimension %d out of range of %dD tensor",
- dim+TH_INDEX_BASE, THTensor_(nDimension)(self));
- return self->size[dim];
-}
-
-long THTensor_(stride)(const THTensor *self, int dim)
-{
- THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "dimension %d out of range of %dD tensor",
- dim+TH_INDEX_BASE, THTensor_(nDimension)(self));
- return self->stride[dim];
-}
-
-THLongStorage *THTensor_(newSizeOf)(THTensor *self)
-{
- THLongStorage *size = THLongStorage_newWithSize(self->nDimension);
- THLongStorage_rawCopy(size, self->size);
- return size;
-}
-
-THLongStorage *THTensor_(newStrideOf)(THTensor *self)
-{
- THLongStorage *stride = THLongStorage_newWithSize(self->nDimension);
- THLongStorage_rawCopy(stride, self->stride);
- return stride;
-}
-
-real *THTensor_(data)(const THTensor *self)
-{
- if(self->storage)
- return (self->storage->data+self->storageOffset);
- else
- return NULL;
-}
-
-void THTensor_(setFlag)(THTensor *self, const char flag)
-{
- self->flag |= flag;
-}
-
-void THTensor_(clearFlag)(THTensor *self, const char flag)
-{
- self->flag &= ~flag;
-}
-
-/**** creation methods ****/
-
-static void THTensor_(rawInit)(THTensor *self);
-
-
-/* Empty init */
-THTensor *THTensor_(new)(void)
-{
- THTensor *self = THAlloc(sizeof(THTensor));
- THTensor_(rawInit)(self);
- return self;
-}
-
-/* Pointer-copy init */
-THTensor *THTensor_(newWithTensor)(THTensor *tensor)
-{
- THTensor *self = THAlloc(sizeof(THTensor));
- THTensor_(rawInit)(self);
- THTensor_(setStorageNd)(self,
- tensor->storage,
- tensor->storageOffset,
- tensor->nDimension,
- tensor->size,
- tensor->stride);
- return self;
-}
-
-/* Storage init */
-THTensor *THTensor_(newWithStorage)(THStorage *storage, ptrdiff_t storageOffset, THLongStorage *size, THLongStorage *stride)
-{
- THTensor *self = THAlloc(sizeof(THTensor));
- if(size && stride)
- THArgCheck(size->size == stride->size, 4, "inconsistent size");
-
- THTensor_(rawInit)(self);
-#ifdef DEBUG
- THAssert((size ? size->size : (stride ? stride->size : 0)) <= INT_MAX);
-#endif
- THTensor_(setStorageNd)(self,
- storage,
- storageOffset,
- (size ? size->size : (stride ? stride->size : 0)),
- (size ? size->data : NULL),
- (stride ? stride->data : NULL));
-
- return self;
-}
-THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset,
- long size0, long stride0)
-{
- return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1);
-}
-
-THTensor *THTensor_(newWithStorage2d)(THStorage *storage, ptrdiff_t storageOffset,
- long size0, long stride0,
- long size1, long stride1)
-{
- return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1);
-}
-
-THTensor *THTensor_(newWithStorage3d)(THStorage *storage, ptrdiff_t storageOffset,
- long size0, long stride0,
- long size1, long stride1,
- long size2, long stride2)
-{
- return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1);
-}
-
-THTensor *THTensor_(newWithStorage4d)(THStorage *storage, ptrdiff_t storageOffset,
- long size0, long stride0,
- long size1, long stride1,
- long size2, long stride2,
- long size3, long stride3)
-{
- long size[4] = {size0, size1, size2, size3};
- long stride[4] = {stride0, stride1, stride2, stride3};
-
- THTensor *self = THAlloc(sizeof(THTensor));
- THTensor_(rawInit)(self);
- THTensor_(setStorageNd)(self, storage, storageOffset, 4, size, stride);
-
- return self;
-}
-
-THTensor *THTensor_(newWithSize)(THLongStorage *size, THLongStorage *stride)
-{
- return THTensor_(newWithStorage)(NULL, 0, size, stride);
-}
-
-THTensor *THTensor_(newWithSize1d)(long size0)
-{
- return THTensor_(newWithSize4d)(size0, -1, -1, -1);
-}
-
-THTensor *THTensor_(newWithSize2d)(long size0, long size1)
-{
- return THTensor_(newWithSize4d)(size0, size1, -1, -1);
-}
-
-THTensor *THTensor_(newWithSize3d)(long size0, long size1, long size2)
-{
- return THTensor_(newWithSize4d)(size0, size1, size2, -1);
-}
-
-THTensor *THTensor_(newWithSize4d)(long size0, long size1, long size2, long size3)
-{
- long size[4] = {size0, size1, size2, size3};
-
- THTensor *self = THAlloc(sizeof(THTensor));
- THTensor_(rawInit)(self);
- THTensor_(resizeNd)(self, 4, size, NULL);
-
- return self;
-}
-
-THTensor *THTensor_(newClone)(THTensor *self)
-{
- THTensor *tensor = THTensor_(new)();
- THTensor_(resizeAs)(tensor, self);
- THTensor_(copy)(tensor, self);
- return tensor;
-}
-
-THTensor *THTensor_(newContiguous)(THTensor *self)
-{
- if(!THTensor_(isContiguous)(self))
- return THTensor_(newClone)(self);
- else
- {
- THTensor_(retain)(self);
- return self;
- }
-}
-
-THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_)
-{
- THTensor *self = THTensor_(newWithTensor)(tensor);
- THTensor_(select)(self, NULL, dimension_, sliceIndex_);
- return self;
-}
-
-THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_)
-{
- THTensor *self = THTensor_(newWithTensor)(tensor);
- THTensor_(narrow)(self, NULL, dimension_, firstIndex_, size_);
- return self;
-}
-
-THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_)
-{
- THTensor *self = THTensor_(newWithTensor)(tensor);
- THTensor_(transpose)(self, NULL, dimension1_, dimension2_);
- return self;
-}
-
-THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_)
-{
- THTensor *self = THTensor_(newWithTensor)(tensor);
- THTensor_(unfold)(self, NULL, dimension_, size_, step_);
- return self;
-}
-
-THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size)
-{
- THArgCheck(THTensor_(isContiguous)(tensor), 1, "input is not contiguous");
- ptrdiff_t numel = THTensor_(nElement)(tensor);
- THTensor *self = THTensor_(new)();
- THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel);
- THTensor_(setStorage)(self, tensor->storage, tensor->storageOffset, inferred_size, NULL);
- THLongStorage_free(inferred_size);
- return self;
-}
-
-/* Resize */
-void THTensor_(resize)(THTensor *self, THLongStorage *size, THLongStorage *stride)
-{
- THArgCheck(size != NULL, 2, "invalid size");
- if(stride)
- THArgCheck(stride->size == size->size, 3, "invalid stride");
-
-#ifdef DEBUG
- THAssert(size->size <= INT_MAX);
-#endif
- THTensor_(resizeNd)(self, size->size, size->data, (stride ? stride->data : NULL));
-}
-
-void THTensor_(resizeAs)(THTensor *self, THTensor *src)
-{
- if(!THTensor_(isSameSizeAs)(self, src))
- THTensor_(resizeNd)(self, src->nDimension, src->size, NULL);
-}
-
-void THTensor_(resize1d)(THTensor *tensor, long size0)
-{
- THTensor_(resize4d)(tensor, size0, -1, -1, -1);
-}
-
-void THTensor_(resize2d)(THTensor *tensor, long size0, long size1)
-{
- THTensor_(resize4d)(tensor, size0, size1, -1, -1);
-}
-
-void THTensor_(resize3d)(THTensor *tensor, long size0, long size1, long size2)
-{
- THTensor_(resize4d)(tensor, size0, size1, size2, -1);
-}
-
-void THTensor_(resize4d)(THTensor *self, long size0, long size1, long size2, long size3)
-{
- long size[4] = {size0, size1, size2, size3};
-
- THTensor_(resizeNd)(self, 4, size, NULL);
-}
-
-void THTensor_(resize5d)(THTensor *self, long size0, long size1, long size2, long size3, long size4)
-{
- long size[5] = {size0, size1, size2, size3, size4};
-
- THTensor_(resizeNd)(self, 5, size, NULL);
-}
-
-THTensor* THTensor_(newExpand)(THTensor *tensor, THLongStorage *sizes) {
- THTensor *result = THTensor_(new)();
- THTensor_(expand)(result, tensor, sizes);
- return result;
-}
-
-void THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *sizes) {
- THArgCheck(THTensor_(nDimension)(tensor) > 0, 0, "can't expand an empty tensor");
- THArgCheck(THLongStorage_size(sizes) >= THTensor_(nDimension)(tensor), 1,
- "the number of sizes provided must be greater or equal to the "
- "number of dimensions in the tensor");
-
- long *expandedSizes;
- long *expandedStrides;
- char error_buffer[1024];
- int ret =
- THLongStorage_inferExpandGeometry(tensor->size, tensor->stride, THTensor_(nDimension)(tensor),
- sizes, &expandedSizes, &expandedStrides, error_buffer, 1024);
-
- if (ret != 0) {
- THError(error_buffer);
- return;
- }
-
- THTensor_(setStorageNd)(r, THTensor_(storage)(tensor), THTensor_(storageOffset)(tensor),
- THLongStorage_size(sizes), expandedSizes, expandedStrides);
- THFree(expandedSizes);
- THFree(expandedStrides);
-}
-
-
-void THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count) {
- for (int i = 0; i < count; ++i) {
- THArgCheck(THTensor_(nDimension)(ops[i]) > 0, i, "can't expand empty tensor %d", i);
- }
-
- long *op_sizes[count];
- long op_dims[count];
-
- for (int i = 0; i < count; ++i) {
- op_sizes[i] = ops[i]->size;
- op_dims[i] = ops[i]->nDimension;
- }
-
- THLongStorage *sizes = THLongStorage_new();
- char error_buffer[1024];
- int ret = THLongStorage_inferSizeN(sizes,
- count,
- op_sizes,
- op_dims,
- error_buffer,
- 1024);
-
- if(ret != 0) {
- THLongStorage_free(sizes);
- THError(error_buffer);
- return;
- }
-
- for (int i = 0; i < count; ++i) {
- THTensor_(expand)(rets[i], ops[i], sizes);
- }
-
- THLongStorage_free(sizes);
-}
-
-void THTensor_(set)(THTensor *self, THTensor *src)
-{
- if(self != src)
- THTensor_(setStorageNd)(self,
- src->storage,
- src->storageOffset,
- src->nDimension,
- src->size,
- src->stride);
-}
-
-void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_)
-{
- if(size_ && stride_)
- THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes");
-
-#ifdef DEBUG
- THAssert((size_ ? size_->size : (stride_ ? stride_->size : 0)) <= INT_MAX);
-#endif
- THTensor_(setStorageNd)(self,
- storage_,
- storageOffset_,
- (size_ ? size_->size : (stride_ ? stride_->size : 0)),
- (size_ ? size_->data : NULL),
- (stride_ ? stride_->data : NULL));
-}
-
-void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_)
-{
- THTensor_(setStorage4d)(self, storage_, storageOffset_,
- size0_, stride0_,
- -1, -1,
- -1, -1,
- -1, -1);
-}
-
-void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_)
-{
- THTensor_(setStorage4d)(self, storage_, storageOffset_,
- size0_, stride0_,
- size1_, stride1_,
- -1, -1,
- -1, -1);
-}
-
-void THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_,
- long size2_, long stride2_)
-{
- THTensor_(setStorage4d)(self, storage_, storageOffset_,
- size0_, stride0_,
- size1_, stride1_,
- size2_, stride2_,
- -1, -1);
-}
-
-void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_,
- long size2_, long stride2_,
- long size3_, long stride3_)
-{
-
- long size[4] = {size0_, size1_, size2_, size3_};
- long stride[4] = {stride0_, stride1_, stride2_, stride3_};
-
- THTensor_(setStorageNd)(self, storage_, storageOffset_, 4, size, stride);
-}
-
-
-void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension, long firstIndex, long size)
-{
- if(!src)
- src = self;
-
- THArgCheck( (dimension >= 0) && (dimension < src->nDimension), 2, "out of range");
- THArgCheck( (firstIndex >= 0) && (firstIndex < src->size[dimension]), 3, "out of range");
- THArgCheck( (size > 0) && (firstIndex <= src->size[dimension] - size), 4, "out of range");
-
- THTensor_(set)(self, src);
-
- if(firstIndex > 0)
- self->storageOffset += firstIndex*self->stride[dimension];
-
- self->size[dimension] = size;
-}
-
-void THTensor_(select)(THTensor *self, THTensor *src, int dimension, long sliceIndex)
-{
- int d;
-
- if(!src)
- src = self;
-
- THArgCheck(src->nDimension > 1, 1, "cannot select on a vector");
- THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "out of range");
- THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 3, "out of range");
-
- THTensor_(set)(self, src);
- THTensor_(narrow)(self, NULL, dimension, sliceIndex, 1);
- for(d = dimension; d < self->nDimension-1; d++)
- {
- self->size[d] = self->size[d+1];
- self->stride[d] = self->stride[d+1];
- }
- self->nDimension--;
-}
-
-void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1, int dimension2)
-{
- long z;
-
- if(!src)
- src = self;
-
- THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, "out of range");
- THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, "out of range");
-
- THTensor_(set)(self, src);
-
- if(dimension1 == dimension2)
- return;
-
- z = self->stride[dimension1];
- self->stride[dimension1] = self->stride[dimension2];
- self->stride[dimension2] = z;
- z = self->size[dimension1];
- self->size[dimension1] = self->size[dimension2];
- self->size[dimension2] = z;
-}
-
-void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension, long size, long step)
-{
- long *newSize;
- long *newStride;
- int d;
-
- if(!src)
- src = self;
-
- THArgCheck( (src->nDimension > 0), 1, "cannot unfold an empty tensor");
- THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "out of range");
- THArgCheck(size <= src->size[dimension], 3, "out of range");
- THArgCheck(step > 0, 4, "invalid step");
-
- THTensor_(set)(self, src);
-
- newSize = THAlloc(sizeof(long)*(self->nDimension+1));
- newStride = THAlloc(sizeof(long)*(self->nDimension+1));
-
- newSize[self->nDimension] = size;
- newStride[self->nDimension] = self->stride[dimension];
- for(d = 0; d < self->nDimension; d++)
- {
- if(d == dimension)
- {
- newSize[d] = (self->size[d] - size) / step + 1;
- newStride[d] = step*self->stride[d];
- }
- else
- {
- newSize[d] = self->size[d];
- newStride[d] = self->stride[d];
- }
- }
-
- THFree(self->size);
- THFree(self->stride);
-
- self->size = newSize;
- self->stride = newStride;
- self->nDimension++;
-}
-
-/* we have to handle the case where the result is a number */
-void THTensor_(squeeze)(THTensor *self, THTensor *src)
-{
- int ndim = 0;
- int d;
-
- if(!src)
- src = self;
-
- THTensor_(set)(self, src);
-
- for(d = 0; d < src->nDimension; d++)
- {
- if(src->size[d] != 1)
- {
- if(d != ndim)
- {
- self->size[ndim] = src->size[d];
- self->stride[ndim] = src->stride[d];
- }
- ndim++;
- }
- }
-
- /* right now, we do not handle 0-dimension tensors */
- if(ndim == 0 && src->nDimension > 0)
- {
- self->size[0] = 1;
- self->stride[0] = 1;
- ndim = 1;
- }
- self->nDimension = ndim;
-}
-
-void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension)
-{
- int d;
-
- if(!src)
- src = self;
-
- THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "dimension out of range");
-
- THTensor_(set)(self, src);
-
- if(src->size[dimension] == 1 && src->nDimension > 1)
- {
- for(d = dimension; d < self->nDimension-1; d++)
- {
- self->size[d] = self->size[d+1];
- self->stride[d] = self->stride[d+1];
- }
- self->nDimension--;
- }
-}
-
-void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension)
-{
- int d;
-
- if(!src)
- src = self;
-
- THArgCheck((dimension >= 0) && (dimension <= src->nDimension), 2, "dimension out of range");
- THArgCheck(src->nDimension > 0, 2, "cannot unsqueeze empty tensor");
-
- THTensor_(set)(self, src);
-
- self->size = (long*)THRealloc(self->size, sizeof(long)*(self->nDimension+1));
- self->stride = (long*)THRealloc(self->stride, sizeof(long)*(self->nDimension+1));
- self->nDimension++;
- for (d = self->nDimension-1; d > dimension; d--) {
- self->size[d] = self->size[d-1];
- self->stride[d] = self->stride[d-1];
- }
- if (dimension+1 < self->nDimension) {
- self->stride[dimension] = self->size[dimension+1] * self->stride[dimension+1];
- } else {
- self->stride[dimension] = 1;
- }
- self->size[dimension] = 1;
-}
-
-int THTensor_(isTransposed)(const THTensor *self)
-{
- if (THTensor_(isContiguous)(self)) {
- return 0;
- }
- long max_stride = 1;
- long size_max_stride = 1;
- long z = 1;
- int d;
- for (d = 0; d < self->nDimension; ++d) {
- if (self->stride[d] == 0 && self->size[d] != 1)
- return 0;
- if (self->stride[d] > max_stride) {
- max_stride = self->stride[d];
- size_max_stride = self->size[d];
- }
- z *= self->size[d];
- }
- if (z == max_stride * size_max_stride) {
- return 1;
- }
- return 0;
-}
-
-int THTensor_(isContiguous)(const THTensor *self)
-{
- long z = 1;
- int d;
- for(d = self->nDimension-1; d >= 0; d--)
- {
- if(self->size[d] != 1)
- {
- if(self->stride[d] == z)
- z *= self->size[d];
- else
- return 0;
- }
- }
- return 1;
-}
-
-int THTensor_(isSize)(const THTensor *self, const THLongStorage *dims)
-{
- int d;
- if (self->nDimension != dims->size)
- return 0;
-
- for(d = 0; d < self->nDimension; ++d)
- {
- if(self->size[d] != dims->data[d])
- return 0;
- }
- return 1;
-}
-
-int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor* src)
-{
- int d;
- if (self->nDimension != src->nDimension)
- return 0;
- for(d = 0; d < self->nDimension; ++d)
- {
- if(self->size[d] != src->size[d])
- return 0;
- }
- return 1;
-}
-
-int THTensor_(isSetTo)(const THTensor *self, const THTensor* src)
-{
- if (!self->storage)
- return 0;
- if (self->storage == src->storage &&
- self->storageOffset == src->storageOffset &&
- self->nDimension == src->nDimension)
- {
- int d;
- for (d = 0; d < self->nDimension; ++d)
- {
- if (self->size[d] != src->size[d] || self->stride[d] != src->stride[d])
- return 0;
- }
- return 1;
- }
- return 0;
-}
-
-ptrdiff_t THTensor_(nElement)(const THTensor *self)
-{
- if(self->nDimension == 0)
- return 0;
- else
- {
- ptrdiff_t nElement = 1;
- int d;
- for(d = 0; d < self->nDimension; d++)
- nElement *= self->size[d];
- return nElement;
- }
-}
-
-void THTensor_(retain)(THTensor *self)
-{
- if(self->flag & TH_TENSOR_REFCOUNTED)
- THAtomicIncrementRef(&self->refcount);
-}
-
-void THTensor_(free)(THTensor *self)
-{
- if(!self)
- return;
-
- if(self->flag & TH_TENSOR_REFCOUNTED)
- {
- if(THAtomicDecrementRef(&self->refcount))
- {
- THFree(self->size);
- THFree(self->stride);
- if(self->storage)
- THStorage_(free)(self->storage);
- THFree(self);
- }
- }
-}
-
-void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst)
-{
- if(self != dst)
- THTensor_(copy)(dst, self);
-
- THTensor_(free)(self);
-}
-
-/*******************************************************************************/
-
-static void THTensor_(rawInit)(THTensor *self)
-{
- self->refcount = 1;
- self->storage = NULL;
- self->storageOffset = 0;
- self->size = NULL;
- self->stride = NULL;
- self->nDimension = 0;
- self->flag = TH_TENSOR_REFCOUNTED;
-}
-
-void THTensor_(setStorageNd)(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride)
-{
- /* storage */
- if(self->storage != storage)
- {
- if(self->storage)
- THStorage_(free)(self->storage);
-
- if(storage)
- {
- self->storage = storage;
- THStorage_(retain)(self->storage);
- }
- else
- self->storage = NULL;
- }
-
- /* storageOffset */
- if(storageOffset < 0)
- THError("Tensor: invalid storage offset");
- self->storageOffset = storageOffset;
-
- /* size and stride */
- THTensor_(resizeNd)(self, nDimension, size, stride);
-}
-
-void THTensor_(resizeNd)(THTensor *self, int nDimension, long *size, long *stride)
-{
- int d;
- int nDimension_;
- ptrdiff_t totalSize;
- int hascorrectsize = 1;
-
- nDimension_ = 0;
- for(d = 0; d < nDimension; d++)
- {
- if(size[d] > 0)
- {
- nDimension_++;
- if((self->nDimension > d) && (size[d] != self->size[d]))
- hascorrectsize = 0;
-
- if((self->nDimension > d) && stride && (stride[d] >= 0) && (stride[d] != self->stride[d]))
- hascorrectsize = 0;
- }
- else
- break;
- }
- nDimension = nDimension_;
-
- if(nDimension != self->nDimension)
- hascorrectsize = 0;
-
- if(hascorrectsize)
- return;
-
- if(nDimension > 0)
- {
- if(nDimension != self->nDimension)
- {
- self->size = THRealloc(self->size, sizeof(long)*nDimension);
- self->stride = THRealloc(self->stride, sizeof(long)*nDimension);
- self->nDimension = nDimension;
- }
-
- totalSize = 1;
- for(d = self->nDimension-1; d >= 0; d--)
- {
- self->size[d] = size[d];
- if(stride && (stride[d] >= 0) )
- self->stride[d] = stride[d];
- else
- {
- if(d == self->nDimension-1)
- self->stride[d] = 1;
- else
- self->stride[d] = self->size[d+1]*self->stride[d+1];
- }
- totalSize += (self->size[d]-1)*self->stride[d];
- }
-
- if(totalSize+self->storageOffset > 0)
- {
- if(!self->storage)
- self->storage = THStorage_(new)();
- if(totalSize+self->storageOffset > self->storage->size)
- THStorage_(resize)(self->storage, totalSize+self->storageOffset);
- }
- }
- else
- self->nDimension = 0;
-}
-
-void THTensor_(set1d)(THTensor *tensor, long x0, real value)
-{
- THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension");
- THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range");
- THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value);
-}
-
-real THTensor_(get1d)(const THTensor *tensor, long x0)
-{
- THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension");
- THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range");
- return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]);
-}
-
-void THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value)
-{
- THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions");
- THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range");
- THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value);
-}
-
-real THTensor_(get2d)(const THTensor *tensor, long x0, long x1)
-{
- THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions");
- THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range");
- return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]);
-}
-
-void THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value)
-{
- THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions");
- THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range");
- THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value);
-}
-
-real THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2)
-{
- THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions");
- THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range");
- return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]);
-}
-
-void THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value)
-{
- THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions");
- THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range");
- THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value);
-}
-
-real THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3)
-{
- THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions");
- THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range");
- return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]);
-}
-
-THDescBuff THTensor_(desc)(const THTensor *tensor) {
- const int L = TH_DESC_BUFF_LEN;
- THDescBuff buf;
- char *str = buf.str;
- int n = 0;
-#define _stringify(x) #x
- n += snprintf(str, L-n, "torch." _stringify(x) "Tensor of size ");
-#undef _stringify
- int i;
- for(i = 0; i < tensor->nDimension; i++) {
- if(n >= L) break;
- n += snprintf(str+n, L-n, "%ld", tensor->size[i]);
- if(i < tensor->nDimension-1) {
- n += snprintf(str+n, L-n, "x");
- }
- }
- if(n >= L) {
- snprintf(str+L-4, 4, "...");
- }
- return buf;
-}
-
-THDescBuff THTensor_(sizeDesc)(const THTensor *tensor) {
- THLongStorage *size = THTensor_(newSizeOf)((THTensor*)tensor);
- THDescBuff buf = THLongStorage_sizeDesc(size);
- THLongStorage_free(size);
- return buf;
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensor.h
deleted file mode 100644
index 9fb246c85..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.h
+++ /dev/null
@@ -1,138 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensor.h"
-#else
-
-/* a la lua? dim, storageoffset, ... et les methodes ? */
-
-#define TH_TENSOR_REFCOUNTED 1
-
-typedef struct THTensor
-{
- long *size;
- long *stride;
- int nDimension;
-
- THStorage *storage;
- ptrdiff_t storageOffset;
- int refcount;
-
- char flag;
-
-} THTensor;
-
-
-/**** access methods ****/
-TH_API THStorage* THTensor_(storage)(const THTensor *self);
-TH_API ptrdiff_t THTensor_(storageOffset)(const THTensor *self);
-TH_API int THTensor_(nDimension)(const THTensor *self);
-TH_API long THTensor_(size)(const THTensor *self, int dim);
-TH_API long THTensor_(stride)(const THTensor *self, int dim);
-TH_API THLongStorage *THTensor_(newSizeOf)(THTensor *self);
-TH_API THLongStorage *THTensor_(newStrideOf)(THTensor *self);
-TH_API real *THTensor_(data)(const THTensor *self);
-
-TH_API void THTensor_(setFlag)(THTensor *self, const char flag);
-TH_API void THTensor_(clearFlag)(THTensor *self, const char flag);
-
-
-/**** creation methods ****/
-TH_API THTensor *THTensor_(new)(void);
-TH_API THTensor *THTensor_(newWithTensor)(THTensor *tensor);
-/* stride might be NULL */
-TH_API THTensor *THTensor_(newWithStorage)(THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);
-TH_API THTensor *THTensor_(newWithStorage1d)(THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_);
-TH_API THTensor *THTensor_(newWithStorage2d)(THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_);
-TH_API THTensor *THTensor_(newWithStorage3d)(THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_,
- long size2_, long stride2_);
-TH_API THTensor *THTensor_(newWithStorage4d)(THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_,
- long size2_, long stride2_,
- long size3_, long stride3_);
-
-/* stride might be NULL */
-TH_API THTensor *THTensor_(newWithSize)(THLongStorage *size_, THLongStorage *stride_);
-TH_API THTensor *THTensor_(newWithSize1d)(long size0_);
-TH_API THTensor *THTensor_(newWithSize2d)(long size0_, long size1_);
-TH_API THTensor *THTensor_(newWithSize3d)(long size0_, long size1_, long size2_);
-TH_API THTensor *THTensor_(newWithSize4d)(long size0_, long size1_, long size2_, long size3_);
-
-TH_API THTensor *THTensor_(newClone)(THTensor *self);
-TH_API THTensor *THTensor_(newContiguous)(THTensor *tensor);
-TH_API THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_);
-TH_API THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_);
-TH_API THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_);
-TH_API THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_);
-TH_API THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size);
-TH_API THTensor *THTensor_(newExpand)(THTensor *tensor, THLongStorage *size);
-
-TH_API void THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *size);
-TH_API void THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count);
-
-TH_API void THTensor_(resize)(THTensor *tensor, THLongStorage *size, THLongStorage *stride);
-TH_API void THTensor_(resizeAs)(THTensor *tensor, THTensor *src);
-TH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, long *size, long *stride);
-TH_API void THTensor_(resize1d)(THTensor *tensor, long size0_);
-TH_API void THTensor_(resize2d)(THTensor *tensor, long size0_, long size1_);
-TH_API void THTensor_(resize3d)(THTensor *tensor, long size0_, long size1_, long size2_);
-TH_API void THTensor_(resize4d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_);
-TH_API void THTensor_(resize5d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_);
-
-TH_API void THTensor_(set)(THTensor *self, THTensor *src);
-TH_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);
-TH_API void THTensor_(setStorageNd)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, int nDimension, long *size, long *stride);
-TH_API void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_);
-TH_API void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_);
-TH_API void THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_,
- long size2_, long stride2_);
-TH_API void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,
- long size0_, long stride0_,
- long size1_, long stride1_,
- long size2_, long stride2_,
- long size3_, long stride3_);
-
-TH_API void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension_, long firstIndex_, long size_);
-TH_API void THTensor_(select)(THTensor *self, THTensor *src, int dimension_, long sliceIndex_);
-TH_API void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1_, int dimension2_);
-TH_API void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension_, long size_, long step_);
-
-TH_API void THTensor_(squeeze)(THTensor *self, THTensor *src);
-TH_API void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension_);
-TH_API void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension_);
-
-TH_API int THTensor_(isContiguous)(const THTensor *self);
-TH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src);
-TH_API int THTensor_(isSetTo)(const THTensor *self, const THTensor *src);
-TH_API int THTensor_(isSize)(const THTensor *self, const THLongStorage *dims);
-TH_API ptrdiff_t THTensor_(nElement)(const THTensor *self);
-
-TH_API void THTensor_(retain)(THTensor *self);
-TH_API void THTensor_(free)(THTensor *self);
-TH_API void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst);
-
-/* Slow access methods [check everything] */
-TH_API void THTensor_(set1d)(THTensor *tensor, long x0, real value);
-TH_API void THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value);
-TH_API void THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value);
-TH_API void THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value);
-
-TH_API real THTensor_(get1d)(const THTensor *tensor, long x0);
-TH_API real THTensor_(get2d)(const THTensor *tensor, long x0, long x1);
-TH_API real THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2);
-TH_API real THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3);
-
-/* Debug methods */
-TH_API THDescBuff THTensor_(desc)(const THTensor *tensor);
-TH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.c
deleted file mode 100644
index 684ff9db5..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.c
+++ /dev/null
@@ -1,1957 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorConv.c"
-#else
-
-/*
- 2D Input, 2D kernel : convolve given image with the given kernel.
-*/
-void THTensor_(validXCorr2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc)
-{
- long or = (ir - kr) / sr + 1;
- long oc = (ic - kc) / sc + 1;
-
- long xx, yy, kx, ky;
-
- if ((sc != 1) || (oc < 4)) {
- /* regular convolution */
- for(yy = 0; yy < or; yy++) {
- for(xx = 0; xx < oc; xx++) {
- /* Dot product in two dimensions... (between input image and the mask) */
- real *pi_ = t_ + yy*sr*ic + xx*sc;
- real *pw_ = k_;
- real sum = 0;
- for(ky = 0; ky < kr; ky++) {
- for(kx = 0; kx < kc; kx++) {
- sum += pi_[kx]*pw_[kx];
- }
- pi_ += ic; /* next input line */
- pw_ += kc; /* next mask line */
- }
- /* Update output */
- *r_++ += alpha*sum;
- }
- }
-
- } else {
- /* SSE-based convolution */
- for(yy = 0; yy < or; yy++) {
- real *pi_ = t_ + yy*sr*ic;
- real *pw_ = k_;
- for (ky = 0; ky < kr; ky++) {
- real *pis_ = pi_;
- for (kx = 0; kx < kc; kx++) {
- THVector_(cadd)(r_, r_, pis_, alpha*pw_[kx], oc);
- pis_++;
- }
- pi_ += ic; /* next input line */
- pw_ += kc; /* next mask line */
- }
- r_ += oc;
- }
- }
-}
-
-/*
- 2D Input, 2D kernel : convolve given image with the given kernel.
-*/
-void THTensor_(validConv2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc)
-{
- long or = (ir - kr) / sr + 1;
- long oc = (ic - kc) / sc + 1;
-
- long xx, yy, kx, ky;
-
- if ((sc != 1) || (oc < 4)) {
- /* regular convolution */
- for(yy = 0; yy < or; yy++) {
- for(xx = 0; xx < oc; xx++) {
- /* Dot product in two dimensions... (between input image and the mask) */
- real *pi_ = t_ + yy*sr*ic + xx*sc;
- real *pw_ = k_ + kr*kc - 1;
- real sum = 0;
- for(ky = 0; ky < kr; ky++) {
- for(kx = 0; kx < kc; kx++) {
- sum += pi_[kx]*pw_[-kx];
- }
- pi_ += ic; /* next input line */
- pw_ -= kc; /* next mask line */
- }
- /* Update output */
- *r_++ += alpha*sum;
- }
- }
-
- } else {
- /* SSE-based convolution */
- for(yy = 0; yy < or; yy++) {
- real *pw_ = k_ + kr*kc - 1;
- real *pi_ = t_ + yy*sr*ic;
- for (ky = 0; ky < kr; ky++) {
- real *pis_ = pi_;
- for (kx = 0; kx < kc; kx++) {
- THVector_(cadd)(r_, r_, pis_, alpha*pw_[-kx], oc);
- pis_++;
- }
- pi_ += ic; /* next input line */
- pw_ -= kc; /* next mask line */
- }
- r_ += oc;
- }
- }
-}
-
-/*
- 2D Input, 2D kernel : convolve given image with the given kernel, full convolution.
-*/
-void THTensor_(fullConv2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc)
-{
- long oc = (ic - 1) * sc + kc;
-
- long xx, yy, kx, ky;
-
- if ((sc != 1) || (ic < 4)) {
- /* regular convolution */
- for(yy = 0; yy < ir; yy++) {
- for(xx = 0; xx < ic; xx++) {
- /* Outer product in two dimensions... (between input image and the mask) */
- real *po_ = r_ + yy*sr*oc + xx*sc;
- real *pw_ = k_;
- for(ky = 0; ky < kr; ky++)
- {
- real z = *t_ * alpha;
- for(kx = 0; kx < kc; kx++) {
- po_[kx] += z * pw_[kx];
- }
- po_ += oc; /* next input line */
- pw_ += kc; /* next mask line */
- }
- t_++;
- }
- }
-
- } else {
- /* SSE-based convolution */
- for(yy = 0; yy < ir; yy++) {
- real *po_ = r_ + yy*sr*oc;
- real *pw_ = k_;
- for (ky = 0; ky < kr; ky++) {
- real *pos_ = po_;
- for (kx = 0; kx < kc; kx++) {
- THVector_(cadd)(pos_, pos_, t_, alpha*pw_[kx], ic);
- pos_++;
- }
- po_ += oc; /* next input line */
- pw_ += kc; /* next mask line */
- }
- t_ += ic;
- }
- }
-}
-
-/*
- 2D Input, 2D kernel : convolve given image with the given kernel, full convolution.
-*/
-void THTensor_(fullXCorr2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc)
-{
- long oc = (ic - 1) * sc + kc;
-
- long xx, yy, kx, ky;
-
- if ((sc != 1) || (ic < 4)) {
- /* regular convolution */
- for(yy = 0; yy < ir; yy++) {
- for(xx = 0; xx < ic; xx++) {
- /* Outer product in two dimensions... (between input image and the mask) */
- real *po_ = r_ + yy*sr*oc + xx*sc;
- real *pw_ = k_ + kr*kc -1;
- long kx, ky;
- for(ky = 0; ky < kr; ky++)
- {
- real z = *t_ * alpha;
- for(kx = 0; kx < kc; kx++) {
- po_[kx] += z * pw_[-kx];
- }
- po_ += oc; /* next input line */
- pw_ -= kc; /* next mask line */
- }
- t_++;
- }
- }
-
- } else {
- /* SSE-based convolution */
- for(yy = 0; yy < ir; yy++) {
- real *po_ = r_ + yy*sr*oc;
- real *pw_ = k_ + kr*kc -1;
- for (ky = 0; ky < kr; ky++) {
- real *pos_ = po_;
- for (kx = 0; kx < kc; kx++) {
- THVector_(cadd)(pos_, pos_, t_, pw_[-kx]*alpha, ic);
- pos_++;
- }
- po_ += oc; /* next input line */
- pw_ -= kc; /* next mask line */
- }
- t_ += ic;
- }
- }
-}
-
-/*
- 2D Input, 2D kernel : convolve given image with the given kernel, valid convolution.
- for sr,sc=1 this is equivalent to validXCorr2Dptr, but otherwise it is useful for
- calculating derivatives wrt a kernel that is applied with stride sr,sc != 1
-*/
-void THTensor_(validXCorr2DRevptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc)
-{
- long or = ir - (kr - 1) * sr;
- long oc = ic - (kc - 1) * sc;
-
- long xx, yy, kx, ky;
-
- if ((sc != 1) || (kc < 4)) {
- /* regular convolution */
- for(yy = 0; yy < kr; yy++) {
- for(xx = 0; xx < kc; xx++) {
- real *po_ = r_;
- real *pi_ = t_ + yy*sr*ic + xx*sc;
- real z = *k_++ * alpha;
-
- for(ky = 0; ky < or; ky++) {
- for(kx = 0; kx < oc; kx++)
- po_[kx] += z * pi_[kx];
- pi_ += ic;
- po_ += oc;
- }
- }
- }
-
- } else {
- /* SSE-based convolution */
- for(yy = 0; yy < kr; yy++) {
- for(xx = 0; xx < kc; xx++) {
- real *po_ = r_;
- real *pi_ = t_ + yy*sr*ic + xx*sc;
- real z = *k_++ * alpha;
-
- for(ky = 0; ky < or; ky++) {
- THVector_(cadd)(po_, po_, pi_, z, oc);
- pi_ += ic;
- po_ += oc;
- }
- }
- }
- }
-}
-/*
- 3D Input, 3D kernel : convolve given volume with the given kernel.
-*/
-void THTensor_(validXCorr3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc)
-{
- long ot = (it - kt) / st + 1;
- long or = (ir - kr) / sr + 1;
- long oc = (ic - kc) / sc + 1;
-
- long zz, xx, yy;
-
- for (zz = 0; zz < ot; zz++)
- {
- for(yy = 0; yy < or; yy++)
- {
- for(xx = 0; xx < oc; xx++)
- {
- /* Dot product in two dimensions... (between input image and the mask) */
- real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;
- real *pw_ = k_;
- real sum = 0;
- long kz, kx, ky;
- for(kz = 0; kz < kt; kz++)
- {
- for(ky = 0; ky < kr; ky++)
- {
- for(kx = 0; kx < kc; kx++) {
- sum += pi_[kx]*pw_[kx];
- }
- pi_ += ic; /* next input line */
- pw_ += kc; /* next mask line */
- }
- pi_ += (ir-kr)*ic; /* next input slice */
- }
- /* Update output */
- *r_++ += sum*alpha;
- }
- }
- }
-}
-
-/*
- 3D Input, 3D kernel : convolve given volume with the given kernel.
-*/
-void THTensor_(validConv3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc)
-{
- long ot = (it - kt) / st + 1;
- long or = (ir - kr) / sr + 1;
- long oc = (ic - kc) / sc + 1;
-
- long zz, xx, yy;
-
- for(zz = 0; zz < ot; zz++)
- {
- for(yy = 0; yy < or; yy++)
- {
- for(xx = 0; xx < oc; xx++)
- {
- /* Dot product in two dimensions... (between input image and the mask) */
- real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;
- real *pw_ = k_ + kt*kr*kc - 1;
- real sum = 0;
- long kz, kx, ky;
- for(kz = 0; kz < kt; kz++)
- {
- for(ky = 0; ky < kr; ky++)
- {
- for(kx = 0; kx < kc; kx++) {
- sum += pi_[kx]*pw_[-kx];
- }
- pi_ += ic; /* next input line */
- pw_ -= kc; /* next mask line */
- }
- pi_ += (ir-kr)*ic; /* next input slice */
- }
- /* Update output */
- *r_++ += alpha*sum;
- }
- }
- }
-}
-
-
-/*
- 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution.
-*/
-void THTensor_(fullConv3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc)
-{
- long or = (ir - 1) * sr + kr;
- long oc = (ic - 1) * sc + kc;
-
- long zz, xx, yy;
-
- for(zz = 0; zz < it; zz++)
- {
- for(yy = 0; yy < ir; yy++)
- {
- for(xx = 0; xx < ic; xx++)
- {
- /* Outer product in two dimensions... (between input image and the mask) */
- real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc;
- real *pw_ = k_;
- long kz, kx, ky;
- /* printf("Output Plane : %ld,%ld,%ld, input val=%g\n",zz,yy,xx,*t_); */
- for(kz = 0; kz < kt; kz++)
- {
- for(ky = 0; ky < kr; ky++)
- {
- real z = *t_ * alpha;
- for(kx = 0; kx < kc; kx++) {
- /* printf("o=%g,k=%g," , po_[kx],pw_[kx]); */
- po_[kx] += z * pw_[kx];
- /* printf("o=%g " , po_[kx]); */
- }
- /* printf("\n"); */
- po_ += oc; /* next input line */
- pw_ += kc; /* next mask line */
- }
- po_ += (or-kr)*oc; /* next output slice */
- /* printf("\n"); */
- }
- t_++;
- }
- }
- }
-}
-
-/*
- 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution.
-*/
-void THTensor_(fullXCorr3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc)
-{
- long or = (ir - 1) * sr + kr;
- long oc = (ic - 1) * sc + kc;
-
- long zz, xx, yy;
-
- for(zz = 0; zz < it; zz++)
- {
- for(yy = 0; yy < ir; yy++)
- {
- for(xx = 0; xx < ic; xx++)
- {
- /* Outer product in two dimensions... (between input image and the mask) */
- real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc;
- real *pw_ = k_ + kt*kr*kc -1;
- long kz, kx, ky;
- for(kz = 0; kz < kt; kz++)
- {
- for(ky = 0; ky < kr; ky++)
- {
- real z = *t_ * alpha;
- for(kx = 0; kx < kc; kx++) {
- po_[kx] += z * pw_[-kx];
- }
- po_ += oc; /* next input line */
- pw_ -= kc; /* next mask line */
- }
- po_ += (or-kr)*oc; /* next output slice */
- }
- t_++;
- }
- }
- }
-}
-
-/*
- 3D Input, 3D kernel : convolve given image with the given kernel, valid convolution.
- for sr,sc=1 this is equivalent to validXCorr3Dptr, but otherwise it is useful for
- calculating derivatives wrt a kernel that is applied with stride sr,sc != 1
-*/
-void THTensor_(validXCorr3DRevptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc)
-{
- long ot = it - (kt - 1) * st;
- long or = ir - (kr - 1) * sr;
- long oc = ic - (kc - 1) * sc;
-
- long zz, xx, yy;
- for(zz = 0; zz < kt; zz++)
- {
- for(yy = 0; yy < kr; yy++)
- {
- for(xx = 0; xx < kc; xx++)
- {
- real *po_ = r_;
- real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;
- real z = *k_++ * alpha;
- long kz, kx, ky;
- for(kz = 0; kz < ot; kz++)
- {
- for(ky = 0; ky < or; ky++)
- {
- for(kx = 0; kx < oc; kx++)
- po_[kx] += z * pi_[kx];
- pi_ += ic;
- po_ += oc;
- }
- pi_ += (ir-or)*ic; /* next input slice */
- }
- }
- }
- }
-}
-
-void THTensor_(conv2d)(real* output_data,
- real alpha,
- real* ptr_input, long nInputRows, long nInputCols,
- real* ptr_weight, long nKernelRows, long nKernelCols,
- long srow, long scol,
- const char *vf, const char *xc)
-{
- THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'");
- if (*vf == 'F')
- if (*xc == 'X')
- THTensor_(fullXCorr2Dptr)(output_data,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(fullConv2Dptr)(output_data,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- if (*xc == 'X')
- THTensor_(validXCorr2Dptr)(output_data,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(validConv2Dptr)(output_data,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
-}
-
-void THTensor_(conv3d)(real* output_data,
- real alpha,
- real* ptr_input, long nInputDepth, long nInputRows, long nInputCols,
- real* ptr_weight, long nKernelDepth, long nKernelRows, long nKernelCols,
- long sdepth, long srow, long scol,
- const char *vf, const char *xc)
-{
- THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'");
- if (*vf == 'F')
- if (*xc == 'X')
- THTensor_(fullXCorr3Dptr)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol);
- else
- THTensor_(fullConv3Dptr)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol);
- else
- if (*xc == 'X')
- THTensor_(validXCorr3Dptr)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol);
- else
- THTensor_(validConv3Dptr)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol);
-}
-
-long THTensor_(convsize)(long x, long k, long s, const char* vf)
-{
- THArgCheck(*vf == 'V' || *vf == 'F', 1, "type of convolution can be 'V' or 'F'");
- if (*vf == 'V')
- return (x-k)/s + 1;
- else
- return (x-1)*s + k;
-}
-
-
-/*
- 3D input, 3D kernel, 4D output
- like rank1 update
- A <- xx' + beta*A
- for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for
- calculating derivatives wrt a kernel that is applied with stride sr,sc != 1
-*/
-void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol)
-{
- long nInputPlane, nInputRows, nInputCols;
- long nKernelPlane, nKernelRows, nKernelCols;
- long nOutputPlane, nOutputRows, nOutputCols;
- long istride0, kstride0;
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k;
-
- THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected");
- THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- nInputPlane = input->size[0];
- istride0 = input->stride[0];
- nInputRows = input->size[1];
- nInputCols = input->size[2];
-
- kstride0 = kernel->stride[0];
- nKernelPlane = kernel->size[0];
- nKernelRows = kernel->size[1];
- nKernelCols = kernel->size[2];
- nOutputPlane = nInputPlane * kernel->size[0];
-
- THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "covn2DRevger : Input image is smaller than kernel");
-
- nOutputRows = nInputRows - (nKernelRows - 1) * srow;
- nOutputCols = nInputCols - (nKernelCols - 1) * scol;
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- /*THTensor_(zero)(r_);*/
-
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]*r_->size[1]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] = 0.0;
- }
- }
- else if (beta != 1)
- {
- /*THTensor_(mul)(r_, beta);*/
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]*r_->size[1]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] *= beta;
- }
- }
-
-#pragma omp parallel for private(k)
- for(k = 0; k < nKernelPlane; k++)
- {
- long i;
- /* get kernel */
- real *ptr_weight = weight_data+k*kstride0;
-
- for(i = 0; i < nInputPlane; i++)
- {
- /* get output */
- real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;
- /* get input */
- real *ptr_input = input_data+i*istride0;
-
- /* do image, kernel convolution */
- THTensor_(validXCorr2DRevptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- /* Next output plane */
- /* output_data += nOutputCols*nOutputRows; */
- }
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-
-/*
- 3D input, 3D kernel, 4D output
- like rank1 update
- A <- xx' + beta*A
- for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for
- calculating derivatives wrt a kernel that is applied with stride sr,sc != 1
-*/
-void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol)
-{
- long nbatch, nInputPlane, nInputRows, nInputCols;
- long nKernelPlane, nKernelRows, nKernelCols;
- long nOutputRows, nOutputCols;
- long istride0, kstride0, istride1, kstride1;
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k;
-
- THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected");
- THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- istride0 = input->stride[0];
- istride1 = input->stride[1];
- nbatch = input->size[0];
- nInputPlane = input->size[1];
- nInputRows = input->size[2];
- nInputCols = input->size[3];
-
- kstride0 = kernel->stride[0];
- kstride1 = kernel->stride[1];
- nKernelPlane = kernel->size[1];
- nKernelRows = kernel->size[2];
- nKernelCols = kernel->size[3];
-
- THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv2DRevger : Input image is smaller than kernel");
- THArgCheck(kernel->size[0] == input->size[0] , 2, "conv2DRevger : Input batch and kernel batch is not same size");
-
- nOutputRows = nInputRows - (nKernelRows - 1) * srow;
- nOutputCols = nInputCols - (nKernelCols - 1) * scol;
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- /*THTensor_(zero)(r_);*/
-
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]*r_->size[1]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] = 0.0;
- }
- }
- else if (beta != 1)
- {
- /*THTensor_(mul)(r_, beta);*/
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]*r_->size[1]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] *= beta;
- }
- }
-
-#pragma omp parallel for private(k)
- for(k = 0; k < nKernelPlane; k++)
- {
- long i;
- for(i = 0; i < nInputPlane; i++)
- {
- long p;
- for(p = 0; p < nbatch; p++)
- {
- /* get kernel */
- real *ptr_weight = weight_data + p*kstride0 + k*kstride1;
- /* get output */
- real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;
- /* get input */
- real *ptr_input = input_data + p*istride0 + i*istride1;
-
- /* do image, kernel convolution */
- THTensor_(validXCorr2DRevptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- /* Next output plane */
- /* output_data += nOutputCols*nOutputRows; */
- }
- }
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-
-/*
- 3D input, 3D kernel, 4D output
- like rank1 update
- A <- xx' + beta*A
-*/
-void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputRows, nInputCols;
- long nKernelPlane, nKernelRows, nKernelCols;
- long nOutputPlane, nOutputRows, nOutputCols;
- long istride0, kstride0;
-
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k;
-
- THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected");
- THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- nInputPlane = input->size[0];
- istride0 = input->stride[0];
- nInputRows = input->size[1];
- nInputCols = input->size[2];
-
- kstride0 = kernel->stride[0];
- nKernelPlane = kernel->size[0];
- nKernelRows = kernel->size[1];
- nKernelCols = kernel->size[2];
- nOutputPlane = nInputPlane * kernel->size[0];
-
- THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dger : Input image is smaller than kernel");
-
- if (*vf == 'F') {
- nOutputRows = (nInputRows - 1) * srow + nKernelRows;
- nOutputCols = (nInputCols - 1) * scol + nKernelCols;
- } else { /* valid */
- nOutputRows = (nInputRows - nKernelRows) / srow + 1;
- nOutputCols = (nInputCols - nKernelCols) / scol + 1;
- }
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize4d)(r_, nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- /*THTensor_(zero)(r_);*/
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]*r_->size[1]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] = 0.0;
- }
- }
- else if (beta != 1)
- {
- /*THTensor_(mul)(r_, beta);*/
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]*r_->size[1]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] *= beta;
- }
- }
-
-#pragma omp parallel for private(k)
- for(k = 0; k < nKernelPlane; k++)
- {
- long i;
- /* get kernel */
- real *ptr_weight = weight_data+k*kstride0;
-
- for(i = 0; i < nInputPlane; i++)
- {
- /* get output */
- real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;
- /* get input */
- real *ptr_input = input_data+i*istride0;
-
- /* do image, kernel convolution */
- if (*vf == 'F')
- if (*xc == 'X')
- THTensor_(fullXCorr2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(fullConv2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- if (*xc == 'X')
- THTensor_(validXCorr2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(validConv2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- /* Next output plane */
- /* output_data += nOutputCols*nOutputRows; */
- }
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-
-/*
- 3D input, 4D kernel, 3D output
- matrix vector product like
- y <- Ax + beta*y
-*/
-void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputRows, nInputCols;
- long nKernelRows, nKernelCols;
- long nOutputPlane, nOutputRows, nOutputCols;
- long istride0, kstride0, kstride1;
- THTensor *input;
- THTensor* kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k;
-
- THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected");
- THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) {
- kernel = THTensor_(newContiguous)(k_);
- } else {
- THTensor_(retain)(k_);
- kernel = k_;
- }
-
- nInputPlane = input->size[0];
- istride0 = input->stride[0];
- nInputRows = input->size[1];
- nInputCols = input->size[2];
-
- kstride0 = kernel->stride[0];
- kstride1 = kernel->stride[1];
- nKernelRows = kernel->size[2];
- nKernelCols = kernel->size[3];
- nOutputPlane = kernel->size[0];
- THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes");
-
- THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel");
-
- if (*vf == 'F') {
- nOutputRows = (nInputRows - 1) * srow + nKernelRows;
- nOutputCols = (nInputCols - 1) * scol + nKernelCols;
- } else { /* valid */
- nOutputRows = (nInputRows - nKernelRows) / srow + 1;
- nOutputCols = (nInputCols - nKernelCols) / scol + 1;
- }
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- /*THTensor_(zero)(r_);*/
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] = 0.0;
- }
- }
- else if (beta != 1)
- {
- /*THTensor_(mul)(r_, beta);*/
-#pragma omp parallel for private(k)
- for (k = 0; k < r_->size[0]; k++)
- {
- real* ptr_output = output_data + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] *= beta;
- }
- }
-
-#pragma omp parallel for private(k)
- for(k = 0; k < nOutputPlane; k++)
- {
- long i;
- /* get output */
- real *ptr_output = output_data + k*nOutputCols*nOutputRows;
- for(i = 0; i < nInputPlane; i++)
- {
- /* get kernel */
- real *ptr_weight = weight_data + k*kstride0 + i*kstride1;
- /* get input */
- real *ptr_input = input_data + i*istride0;
-
- /* do image, kernel convolution */
- if (*vf == 'F')
- if (*xc == 'X')
- THTensor_(fullXCorr2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(fullConv2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- if (*xc == 'X')
- THTensor_(validXCorr2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(validConv2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- }
- /* Next output plane */
- /* output_data += nOutputCols*nOutputRows;*/
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-
-/*
- 3D input, 4D kernel, 3D output
- matrix vector product like
- y <- Ax + beta*y
-*/
-void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputRows, nInputCols;
- long nKernelRows, nKernelCols;
- long nOutputPlane, nOutputRows, nOutputCols;
- long kstride0, kstride1;
- THTensor *input;
- THTensor* kernel;
- long nbatch;
- ptrdiff_t nelem;
- real *input_data;
- real *weight_data;
- real *output_data;
- long p;
-
- THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected");
- THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) {
- kernel = THTensor_(newContiguous)(k_);
- } else {
- THTensor_(retain)(k_);
- kernel = k_;
- }
-
- nbatch = input->size[0];
- nInputPlane = input->size[1];
- nInputRows = input->size[2];
- nInputCols = input->size[3];
-
- kstride0 = kernel->stride[0];
- kstride1 = kernel->stride[1];
- nKernelRows = kernel->size[2];
- nKernelCols = kernel->size[3];
- nOutputPlane = kernel->size[0];
- THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes");
-
- THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel");
-
- if (*vf == 'F') {
- nOutputRows = (nInputRows - 1) * srow + nKernelRows;
- nOutputCols = (nInputCols - 1) * scol + nKernelCols;
- } else { /* valid */
- nOutputRows = (nInputRows - nKernelRows) / srow + 1;
- nOutputCols = (nInputCols - nKernelCols) / scol + 1;
- }
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize4d)(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- /*THTensor_(zero)(r_);*/
-#pragma omp parallel for private(p)
- for (p=0; p < r_->size[0]; p++)
- {
- long k;
- for (k = 0; k < r_->size[1]; k++)
- {
- real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] = 0.0;
- }
- }
- }
- else if (beta != 1)
- {
- /*THTensor_(mul)(r_, beta);*/
-#pragma omp parallel for private(p)
- for(p=0; p < r_->size[0]; p++)
- {
- long k;
- for (k = 0; k < r_->size[1]; k++)
- {
- real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
- long l;
- for (l = 0; l < nOutputRows*nOutputCols; l++)
- ptr_output[l] *= beta;
- }
- }
- }
-
-#pragma omp parallel for private(p)
- for(p=0; p < nbatch; p++)
- {
- long k;
- for(k = 0; k < nOutputPlane; k++)
- {
- long i;
- /* get output */
- real *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows;
- for(i = 0; i < nInputPlane; i++)
- {
- /* get kernel */
- real *ptr_weight = weight_data + k*kstride0 + i*kstride1;
- /* get input */
- real *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols;
-
- /* do image, kernel convolution */
- if (*vf == 'F')
- if (*xc == 'X')
- THTensor_(fullXCorr2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(fullConv2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- if (*xc == 'X')
- THTensor_(validXCorr2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- else
- THTensor_(validConv2Dptr)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol);
- }
- /* Next output plane */
- /* output_data += nOutputCols*nOutputRows;*/
- }
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-
-/*
- 2D input, 2D kernel, 2D output
- scalar multiplication like
- y <- x*y + beta*y
-*/
-void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)
-{
- THTensor *input;
- THTensor* kernel;
- long nInputRows;
- long nInputCols;
- long nKernelRows;
- long nKernelCols;
- long nOutputRows, nOutputCols;
- real *ptr_input;
- real *ptr_weight;
- real *output_data;
- ptrdiff_t nelem;
-
- THArgCheck(t_->nDimension == 2 , 3, "input: 2D Tensor expected");
- THArgCheck(k_->nDimension == 2 , 4, "kernel: 2D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- nInputRows = input->size[0];
- nInputCols = input->size[1];
- nKernelRows = kernel->size[0];
- nKernelCols = kernel->size[1];
-
- THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmul : Input image is smaller than kernel");
-
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize2d)(r_, nOutputRows, nOutputCols);
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- THTensor_(zero)(r_);
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- ptr_input = THTensor_(data)(input);
- ptr_weight = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
-
- /* do image, kernel convolution */
- THTensor_(conv2d)(output_data,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol, vf, xc);
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-/*
- 3D input, 3D kernel, 3D output
- component wise multiplication like
- y <- y.*x + beta*y
-*/
-void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputRows, nInputCols;
- long nKernelRows, nKernelCols;
- long nOutputPlane, nOutputRows, nOutputCols;
- long istride0, kstride0;
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k;
-
- THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected");
- THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- istride0 = input->stride[0];
- nInputPlane = input->size[0];
- nInputRows = input->size[1];
- nInputCols = input->size[2];
-
- kstride0 = kernel->stride[0];
- nOutputPlane = kernel->size[0];
- nKernelRows = kernel->size[1];
- nKernelCols = kernel->size[2];
-
- THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes");
- THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dcmul : Input image is smaller than kernel");
-
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- THTensor_(zero)(r_);
- }
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- for(k = 0; k < nOutputPlane; k++)
- {
- /* get kernel */
- real *ptr_weight = weight_data + k*kstride0;
- /* get input */
- real *ptr_input = input_data + k*istride0;
-
- /* do image, kernel convolution */
- THTensor_(conv2d)(output_data,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol, vf, xc);
- /* Next output plane */
- output_data += nOutputCols*nOutputRows;
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-/*
- 3D input, 3D kernel, 3D output
- component wise multiplication like with a permutation map
- y <- y.*x + beta*y
-*/
-void THTensor_(conv2Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputRows, nInputCols;
- long nKernelRows, nKernelCols;
- long nOutputPlane, nOutputRows, nOutputCols;
- long istride0, kstride0;
- THTensor *input;
- THTensor* kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- long nmaps;
- ptrdiff_t nelem;
- long k;
-
- THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected");
- THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected");
- THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected");
- THArgCheck(srow >= 1, 6, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 7, "Stride should be a positive integer");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- istride0 = input->stride[0];
- nInputPlane = input->size[0];
- nInputRows = input->size[1];
- nInputCols = input->size[2];
-
- kstride0 = kernel->stride[0];
- nOutputPlane = kernel->size[0];
- nKernelRows = kernel->size[1];
- nKernelCols = kernel->size[2];
-
- THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes");
- THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols)
- || *vf == 'F', 2, "conv2Dmap : Input image is smaller than kernel");
-
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- THTensor_(zero)(r_);
- }
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- nmaps = map->size[0];
-
- for(k = 0; k < nmaps; k++)
- {
- /* get indices */
- long from = (long)THTensor_(get2d)(map,k,0)-1;
- long to = (long)THTensor_(get2d)(map,k,1)-1;
-
- /* get kernel */
- real *ptr_weight = weight_data + k*kstride0;
- /* get input */
- real *ptr_input = input_data + from*istride0;
- /* get output */
- real *ptr_output = output_data + to*nOutputRows*nOutputCols;
-
- /* do image, kernel convolution */
- THTensor_(conv2d)(ptr_output,
- alpha,
- ptr_input, nInputRows, nInputCols,
- ptr_weight, nKernelRows, nKernelCols,
- srow, scol, vf, xc);
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-/*
- 4D input, 4D kernel, 5D output
- like rank1 update
- A <- xx' + beta*A
- for sr,sc=1 this is equivalent to xcorr2Dger, but otherwise it is useful for
- calculating derivatives wrt a kernel that is applied with stride sr,sc != 1
-*/
-void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,
- long sdepth, long srow, long scol)
-{
- long nInputPlane, nInputDepth, nInputRows, nInputCols;
- long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols;
- long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
- long istride0, kstride0;
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k, i;
-
- THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected");
- THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected");
- THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer");
- THArgCheck(srow >= 1, 6, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 7, "Stride should be a positive integer");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- nInputPlane = input->size[0];
- istride0 = input->stride[0];
- nInputDepth = input->size[1];
- nInputRows = input->size[2];
- nInputCols = input->size[3];
-
- kstride0 = kernel->stride[0];
- nKernelPlane = kernel->size[0];
- nKernelDepth= kernel->size[1];
- nKernelRows = kernel->size[2];
- nKernelCols = kernel->size[3];
- nOutputPlane = nInputPlane * kernel->size[0];
-
- THArgCheck(nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv3DRevger : Input image is smaller than kernel");
-
- nOutputDepth = nInputDepth - (nKernelDepth - 1) * sdepth;
- nOutputRows = nInputRows - (nKernelRows - 1) * srow;
- nOutputCols = nInputCols - (nKernelCols - 1) * scol;
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- THTensor_(zero)(r_);
- }
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- for(k = 0; k < nKernelPlane; k++)
- {
- /* get kernel */
- real *ptr_weight = weight_data+k*kstride0;
-
- for(i = 0; i < nInputPlane; i++)
- {
- /* get input */
- real *ptr_input = input_data+i*istride0;
-
- /* do image, kernel convolution */
- THTensor_(validXCorr3DRevptr)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol);
- /* Next output plane */
- output_data += nOutputDepth*nOutputCols*nOutputRows;
- }
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-
-/*
- 4D input, 4D kernel, 5D output
- like rank1 update
- A <- xx' + beta*A
-*/
-void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,
- long sdepth, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputDepth, nInputRows, nInputCols;
- long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols;
- long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
- long istride0, kstride0;
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k, i;
-
- THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected");
- THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected");
- THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer");
- THArgCheck(srow >= 1, 6, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 7, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- nInputPlane = input->size[0];
- istride0 = input->stride[0];
- nInputDepth = input->size[1];
- nInputRows = input->size[2];
- nInputCols = input->size[3];
-
- kstride0 = kernel->stride[0];
- nKernelPlane = kernel->size[0];
- nKernelDepth = kernel->size[1];
- nKernelRows = kernel->size[2];
- nKernelCols = kernel->size[3];
- nOutputPlane = nInputPlane * kernel->size[0];
-
- THArgCheck((nInputDepth >= nKernelDepth
- && nInputRows >= nKernelRows
- && nInputCols >= nKernelCols)
- || *vf == 'F', 2, "conv3Dger : Input image is smaller than kernel");
-
- nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- THTensor_(zero)(r_);
- }
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- for(k = 0; k < nKernelPlane; k++)
- {
- /* get kernel */
- real *ptr_weight = weight_data+k*kstride0;
-
- for(i = 0; i < nInputPlane; i++)
- {
- /* get input */
- real *ptr_input = input_data+i*istride0;
-
- /* do image, kernel convolution */
- THTensor_(conv3d)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol, vf, xc);
-
- /* Next output plane */
- output_data += nOutputDepth*nOutputCols*nOutputRows;
- }
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-/*
- 4D input, 5D kernel, 4D output
- matrix vector product like
- y <- Ax + beta*y
-*/
-void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,
- long sdepth, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputDepth, nInputRows, nInputCols;
- long nKernelDepth, nKernelRows, nKernelCols;
- long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
- long istride0, kstride0, kstride1;
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k, i;
-
- THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected");
- THArgCheck(k_->nDimension == 5 , 4, "kernel: 5D Tensor expected");
- THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer");
- THArgCheck(srow >= 1, 6, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 7, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- if (!(k_->stride[4] == 1) || !(k_->stride[3] == k_->size[4])) {
- kernel = THTensor_(newContiguous)(k_);
- } else {
- THTensor_(retain)(k_);
- kernel = k_;
- }
-
- nInputPlane = input->size[0];
- istride0 = input->stride[0];
- nInputDepth = input->size[1];
- nInputRows = input->size[2];
- nInputCols = input->size[3];
-
- kstride0 = kernel->stride[0];
- kstride1 = kernel->stride[1];
- nKernelDepth = kernel->size[2];
- nKernelRows = kernel->size[3];
- nKernelCols = kernel->size[4];
- nOutputPlane = kernel->size[0];
- THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes");
-
- THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmv : Input image is smaller than kernel");
-
- nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- THTensor_(zero)(r_);
- }
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- for(k = 0; k < nOutputPlane; k++)
- {
- for(i = 0; i < nInputPlane; i++)
- {
- /* get kernel */
- real *ptr_weight = weight_data + k*kstride0 + i*kstride1;
- /* get input */
- real *ptr_input = input_data + i*istride0;
-
- /* do image, kernel convolution */
- THTensor_(conv3d)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol, vf, xc);
- }
- /* Next output plane */
- output_data += nOutputDepth*nOutputCols*nOutputRows;
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-/*
- 3D input, 3D kernel, 3D output
- scalar multiplication like
- y <- x*y + beta*y
-*/
-void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,
- long sdepth, long srow, long scol, const char *vf, const char *xc)
-{
- THTensor *input;
- THTensor* kernel;
- long nInputDepth;
- long nInputRows;
- long nInputCols;
- long nKernelDepth;
- long nKernelRows;
- long nKernelCols;
- long nOutputDepth, nOutputRows, nOutputCols;
- real *ptr_input;
- real *ptr_weight;
- real *output_data;
- ptrdiff_t nelem;
-
- THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected");
- THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected");
- THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer");
- THArgCheck(srow >= 1, 6, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 7, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- nInputDepth = input->size[0];
- nInputRows = input->size[1];
- nInputCols = input->size[2];
- nKernelDepth = kernel->size[0];
- nKernelRows = kernel->size[1];
- nKernelCols = kernel->size[2];
-
- THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmul : Input image is smaller than kernel");
-
- nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize3d)(r_, nOutputDepth, nOutputRows, nOutputCols);
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- THTensor_(zero)(r_);
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- ptr_input = THTensor_(data)(input);
- ptr_weight = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
-
- /* do image, kernel convolution */
- THTensor_(conv3d)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol, vf, xc);
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-/*
- 4D input, 4D kernel, 4D output
- component wise multiplication like
- y <- y.*x + beta*y
-*/
-void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,
- long sdepth, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputDepth, nInputRows, nInputCols;
- long nKernelDepth, nKernelRows, nKernelCols;
- long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
- long istride0, kstride0;
-
- THTensor *input;
- THTensor *kernel;
- real *input_data;
- real *weight_data;
- real *output_data;
- ptrdiff_t nelem;
- long k;
-
- THArgCheck(t_->nDimension == 4 , 3, "input: 3D Tensor expected");
- THArgCheck(k_->nDimension == 4 , 4, "kernel: 3D Tensor expected");
- THArgCheck(srow >= 1, 5, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 6, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- istride0 = input->stride[0];
- nInputPlane = input->size[0];
- nInputDepth = input->size[1];
- nInputRows = input->size[2];
- nInputCols = input->size[3];
-
- kstride0 = kernel->stride[0];
- nOutputPlane = kernel->size[0];
- nKernelDepth = kernel->size[1];
- nKernelRows = kernel->size[2];
- nKernelCols = kernel->size[3];
-
- THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes");
- THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dcmul : Input image is smaller than kernel");
-
- nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- THTensor_(zero)(r_);
- }
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- for(k = 0; k < nOutputPlane; k++)
- {
- /* get kernel */
- real *ptr_weight = weight_data + k*kstride0;
- /* get input */
- real *ptr_input = input_data + k*istride0;
-
- /* do image, kernel convolution */
- THTensor_(conv3d)(output_data,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol, vf, xc);
-
- /* Next output plane */
- output_data += nOutputDepth*nOutputCols*nOutputRows;
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-
-/*
- 4D input, 4D kernel, 4D output
- component wise multiplication like with a permutation map
- y <- y.*x + beta*y
-*/
-void THTensor_(conv3Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map,
- long sdepth, long srow, long scol, const char *vf, const char *xc)
-{
- long nInputPlane, nInputDepth, nInputRows, nInputCols;
- long nKernelDepth, nKernelRows, nKernelCols;
- long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;
- long istride0, kstride0;
-
- THTensor *input;
- THTensor *kernel;
- ptrdiff_t nelem;
- real *input_data;
- real *weight_data;
- real *output_data;
- long nmaps;
- long k;
-
- THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected");
- THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected");
- THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected");
- THArgCheck(srow >= 1, 6, "Stride should be a positive integer");
- THArgCheck(scol >= 1, 7, "Stride should be a positive integer");
- THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'");
- THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'");
-
- input = THTensor_(newContiguous)(t_);
- kernel = THTensor_(newContiguous)(k_);
-
- istride0 = input->stride[0];
- nInputPlane = input->size[0];
- nInputDepth = input->size[1];
- nInputRows = input->size[2];
- nInputCols = input->size[3];
-
- kstride0 = kernel->stride[0];
- nOutputPlane = kernel->size[0];
- nKernelDepth = kernel->size[1];
- nKernelRows = kernel->size[2];
- nKernelCols = kernel->size[3];
-
- THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes");
- THArgCheck((nInputDepth >= nKernelDepth
- && nInputRows >= nKernelRows
- && nInputCols >= nKernelCols) || *vf == 'F',
- 2, "conv3Dmap : Input image is smaller than kernel");
-
- nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);
- nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);
- nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);
-
- nelem = THTensor_(nElement)(r_);
- THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);
-
- if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))
- {
- THTensor_(zero)(r_);
- }
- else if (beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- input_data = THTensor_(data)(input);
- weight_data = THTensor_(data)(kernel);
- output_data = THTensor_(data)(r_);
-
- nmaps = map->size[0];
-
- for(k = 0; k < nmaps; k++)
- {
- /* get indices */
- long from = (long)THTensor_(get2d)(map,k,0)-1;
- long to = (long)THTensor_(get2d)(map,k,1)-1;
-
- /* get kernel */
- real *ptr_weight = weight_data + k*kstride0;
- /* get input */
- real *ptr_input = input_data + from*istride0;
- /* get output */
- real *ptr_output = output_data + to*nOutputDepth*nOutputRows*nOutputCols;
-
- /* do image, kernel convolution */
- THTensor_(conv3d)(ptr_output,
- alpha,
- ptr_input, nInputDepth, nInputRows, nInputCols,
- ptr_weight, nKernelDepth, nKernelRows, nKernelCols,
- sdepth, srow, scol, vf, xc);
- }
- THTensor_(free)(input);
- THTensor_(free)(kernel);
-}
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.h
deleted file mode 100644
index 79866f390..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorConv.h"
-#else
-
-TH_API void THTensor_(validXCorr2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc);
-
-TH_API void THTensor_(validConv2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc);
-
-TH_API void THTensor_(fullXCorr2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc);
-
-TH_API void THTensor_(fullConv2Dptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc);
-
-TH_API void THTensor_(validXCorr2DRevptr)(real *r_,
- real alpha,
- real *t_, long ir, long ic,
- real *k_, long kr, long kc,
- long sr, long sc);
-
-TH_API void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol);
-TH_API void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol);
-TH_API void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);
-TH_API void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);
-TH_API void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);
-TH_API void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);
-TH_API void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);
-
-TH_API void THTensor_(validXCorr3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc);
-
-TH_API void THTensor_(validConv3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc);
-
-TH_API void THTensor_(fullXCorr3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc);
-
-TH_API void THTensor_(fullConv3Dptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc);
-
-TH_API void THTensor_(validXCorr3DRevptr)(real *r_,
- real alpha,
- real *t_, long it, long ir, long ic,
- real *k_, long kt, long kr, long kc,
- long st, long sr, long sc);
-
-TH_API void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol);
-TH_API void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);
-TH_API void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);
-TH_API void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);
-TH_API void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.c
deleted file mode 100644
index d9cd1c0d5..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.c
+++ /dev/null
@@ -1,136 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorCopy.c"
-#else
-
-int THTensor_(copyTransposeValid)(THTensor *tensor, THTensor *src) {
- const int MIN_SZ = 60 * 60;
- return THTensor_(isContiguous)(tensor) &&
- THTensor_(nDimension)(src) == 2 &&
- THTensor_(stride)(src, 0) == 1 &&
- THTensor_(stride)(src, 1) == THTensor_(size)(src, 0) &&
- THTensor_(nElement)(tensor) >= MIN_SZ;
-}
-
-// special case copy where tensor is contiguous and src is a transposed matrix
-// This can be generalized to most copies, but it's tricker
-void THTensor_(copyTranspose)(THTensor *tensor, THTensor *src) {
- #define MIN(x, y) (((x) < (y)) ? (x) : (y))
- #define MAX(x, y) (((x) > (y)) ? (x) : (y))
-
-#ifdef TH_REAL_IS_BYTE
- const int BLOCK_SZ = 120;
-#else
- const int BLOCK_SZ = 60;
-#endif
-
- THTensor *buf = THTensor_(newWithSize2d)(BLOCK_SZ, BLOCK_SZ);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(tensor);
- real *bp = THTensor_(data)(buf);
-
- long NR = THTensor_(size)(src, 0);
- long NC = THTensor_(size)(src, 1);
- for (long R = 0; R < NR; R += BLOCK_SZ) {
- for (long C = 0; C < NC; C += BLOCK_SZ) {
- real *spo = sp + R + C * NR;
- real *rpo = rp + C + R * NC;
-
- int nr = MIN(NR - R, BLOCK_SZ);
- int nc = MIN(NC - C, BLOCK_SZ);
-
- // 1. copy columns from src to buf
- for (int c = 0; c < nc; c++) {
- memcpy(bp + c * BLOCK_SZ, spo + c * NR, nr * sizeof(real));
- }
-
- // 2. transpose buf in place
- int rc_max = MAX(nr, nc);
- int rc_min = MIN(nr, nc);
- for (int r = 0; r < rc_max; r++) {
- int end = MIN(r, rc_min);
- for (int c = 0; c < end; c++) {
- real tmp = bp[r + BLOCK_SZ * c];
- bp[r + BLOCK_SZ * c] = bp[r * BLOCK_SZ + c];
- bp[r * BLOCK_SZ + c] = tmp;
- }
- }
-
- // 3. copy rows from buf to dst
- for (int r = 0; r < nr; r++) {
- memcpy(rpo + r * NC, bp + r * BLOCK_SZ, nc * sizeof(real));
- }
- }
- }
- THTensor_(free)(buf);
- #undef MIN
- #undef MAX
-}
-
-void THTensor_(copy)(THTensor *tensor, THTensor *src)
-{
- if (tensor == src) return;
- if (THTensor_(isContiguous)(tensor) && THTensor_(isContiguous)(src) && THTensor_(nElement)(tensor) == THTensor_(nElement)(src)) {
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(tensor);
- ptrdiff_t sz = THTensor_(nElement)(tensor);
-#ifndef TH_REAL_IS_HALF
- THVector_(copy)(rp, sp, sz);
-#else
- memcpy(rp, sp, sz * sizeof(real));
-#endif
-#ifndef TH_REAL_IS_HALF
- } else if (THTensor_(copyTransposeValid)(tensor, src)) {
- THTensor_(copyTranspose)(tensor, src);
-#endif
- } else {
- TH_TENSOR_APPLY2(real, tensor, real, src, *tensor_data = *src_data;)
- }
-}
-
-#define IMPLEMENT_THTensor_COPY(TYPENAMESRC, TYPE_SRC) \
-void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \
-{ \
- TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)(*src_data);) \
-}
-
-#define IMPLEMENT_THTensor_COPY_TO_HALF(TYPENAMESRC, TYPE_SRC) \
-void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \
-{ \
- TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = TH_float2half((float)*src_data);) \
-}
-
-#define IMPLEMENT_THTensor_COPY_FROM_HALF(TYPENAMESRC, TYPE_SRC) \
-void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \
-{ \
- TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)TH_half2float(*src_data);) \
-}
-
-#define IMPLEMENT_THTensor_COPY_TO_FROM_HALF(TYPENAMESRC, TYPE_SRC) \
-void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \
-{ \
- TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = *src_data;) \
-}
-
-#ifndef TH_REAL_IS_HALF
-IMPLEMENT_THTensor_COPY(Byte, unsigned char)
-IMPLEMENT_THTensor_COPY(Char, char)
-IMPLEMENT_THTensor_COPY(Short, short)
-IMPLEMENT_THTensor_COPY(Int, int)
-IMPLEMENT_THTensor_COPY(Long, long)
-IMPLEMENT_THTensor_COPY(Float, float)
-IMPLEMENT_THTensor_COPY(Double, double)
-IMPLEMENT_THTensor_COPY_FROM_HALF(Half, THHalf)
-#else
-/* only allow pass-through for Half */
-IMPLEMENT_THTensor_COPY_TO_FROM_HALF(Half, THHalf)
-IMPLEMENT_THTensor_COPY_TO_HALF(Byte, unsigned char)
-IMPLEMENT_THTensor_COPY_TO_HALF(Char, char)
-IMPLEMENT_THTensor_COPY_TO_HALF(Short, short)
-IMPLEMENT_THTensor_COPY_TO_HALF(Int, int)
-IMPLEMENT_THTensor_COPY_TO_HALF(Long, long)
-IMPLEMENT_THTensor_COPY_TO_HALF(Float, float)
-IMPLEMENT_THTensor_COPY_TO_HALF(Double, double)
-
-#endif /* REAL_IS_HALF */
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.h
deleted file mode 100644
index b9e5bfc99..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorCopy.h"
-#else
-
-/* Support for copy between different Tensor types */
-
-TH_API void THTensor_(copy)(THTensor *tensor, THTensor *src);
-TH_API void THTensor_(copyByte)(THTensor *tensor, struct THByteTensor *src);
-TH_API void THTensor_(copyChar)(THTensor *tensor, struct THCharTensor *src);
-TH_API void THTensor_(copyShort)(THTensor *tensor, struct THShortTensor *src);
-TH_API void THTensor_(copyInt)(THTensor *tensor, struct THIntTensor *src);
-TH_API void THTensor_(copyLong)(THTensor *tensor, struct THLongTensor *src);
-TH_API void THTensor_(copyFloat)(THTensor *tensor, struct THFloatTensor *src);
-TH_API void THTensor_(copyDouble)(THTensor *tensor, struct THDoubleTensor *src);
-TH_API void THTensor_(copyHalf)(THTensor *tensor, struct THHalfTensor *src);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.c
deleted file mode 100644
index d4e52f6d7..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.c
+++ /dev/null
@@ -1,1121 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorLapack.c"
-#else
-
-/*
-Check if self is transpose of a contiguous matrix
-*/
-static int THTensor_(isTransposedContiguous)(THTensor *self)
-{
- return self->stride[0] == 1 && self->stride[1] == self->size[0];
-}
-/*
-If a matrix is a regular contiguous matrix, make sure it is transposed
-because this is what we return from Lapack calls.
-*/
-static void THTensor_(checkTransposed)(THTensor *self)
-{
- if(THTensor_(isContiguous)(self))
- THTensor_(transpose)(self, NULL, 0, 1);
- return;
-}
-/*
-newContiguous followed by transpose
-Similar to (newContiguous), but checks if the transpose of the matrix
-is contiguous and also limited to 2D matrices.
-*/
-static THTensor *THTensor_(newTransposedContiguous)(THTensor *self)
-{
- THTensor *tensor;
- if(THTensor_(isTransposedContiguous)(self))
- {
- THTensor_(retain)(self);
- tensor = self;
- }
- else
- {
- tensor = THTensor_(newContiguous)(self);
- THTensor_(transpose)(tensor, NULL, 0, 1);
- }
-
- return tensor;
-}
-
-/*
-Given the result tensor and src tensor, decide if the lapack call should use the
-provided result tensor or should allocate a new space to put the result in.
-
-The returned tensor have to be freed by the calling function.
-
-nrows is required, because some lapack calls, require output space smaller than
-input space, like underdetermined gels.
-*/
-static THTensor *THTensor_(checkLapackClone)(THTensor *result, THTensor *src, int nrows)
-{
- /* check if user wants to reuse src and if it is correct shape/size */
- if (src == result && THTensor_(isTransposedContiguous)(src) && src->size[1] == nrows)
- THTensor_(retain)(result);
- else if(src == result || result == NULL) /* in this case, user wants reuse of src, but its structure is not OK */
- result = THTensor_(new)();
- else
- THTensor_(retain)(result);
- return result;
-}
-
-/*
-Same as cloneColumnMajor, but accepts nrows argument, because some lapack calls require
-the resulting tensor to be larger than src.
-*/
-static THTensor *THTensor_(cloneColumnMajorNrows)(THTensor *self, THTensor *src, int nrows)
-{
- THTensor *result;
- THTensor *view;
-
- if (src == NULL)
- src = self;
- result = THTensor_(checkLapackClone)(self, src, nrows);
- if (src == result)
- return result;
-
- THTensor_(resize2d)(result, src->size[1], nrows);
- THTensor_(checkTransposed)(result);
-
- if (src->size[0] == nrows)
- THTensor_(copy)(result, src);
- else
- {
- view = THTensor_(newNarrow)(result, 0, 0, src->size[0]);
- THTensor_(copy)(view, src);
- THTensor_(free)(view);
- }
- return result;
-}
-
-/*
-Create a clone of src in self column major order for use with Lapack.
-If src == self, a new tensor is allocated, in any case, the return tensor should be
-freed by calling function.
-*/
-static THTensor *THTensor_(cloneColumnMajor)(THTensor *self, THTensor *src)
-{
- return THTensor_(cloneColumnMajorNrows)(self, src, src->size[0]);
-}
-
-void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a)
-{
- int free_b = 0;
- if (a == NULL) a = ra_;
- if (b == NULL) b = rb_;
- THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d",
- a->nDimension);
- THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 "
- "dimensions, but has %d", b->nDimension);
- THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld",
- a->size[0], a->size[1]);
- THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld "
- "rows, B has %ld", a->size[0], b->size[0]);
-
- if (b->nDimension == 1) {
- b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],
- b->stride[0], 1, 0);
- free_b = 1;
- }
-
- int n, nrhs, lda, ldb, info;
- THIntTensor *ipiv;
- THTensor *ra__; // working version of A matrix to be passed into lapack GELS
- THTensor *rb__; // working version of B matrix to be passed into lapack GELS
-
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
- rb__ = THTensor_(cloneColumnMajor)(rb_, b);
-
- n = (int)ra__->size[0];
- nrhs = (int)rb__->size[1];
- lda = n;
- ldb = n;
-
- ipiv = THIntTensor_newWithSize1d((long)n);
- THLapack_(gesv)(n, nrhs,
- THTensor_(data)(ra__), lda, THIntTensor_data(ipiv),
- THTensor_(data)(rb__), ldb, &info);
-
- THLapackCheckWithCleanup("Lapack Error in %s : U(%d,%d) is zero, singular U.",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(rb__);
- THIntTensor_free(ipiv);
- if (free_b) THTensor_(free)(b);),
- "gesv", info, info);
-
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(freeCopyTo)(rb__, rb_);
- THIntTensor_free(ipiv);
- if (free_b) THTensor_(free)(b);
-}
-
-void THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a,
- const char *uplo, const char *trans, const char *diag)
-{
- int free_b = 0;
- if (a == NULL) a = ra_;
- if (b == NULL) b = rb_;
- THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d",
- a->nDimension);
- THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 "
- "dimensions, but has %d", b->nDimension);
- THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld",
- a->size[0], a->size[1]);
- THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld "
- "rows, B has %ld", a->size[0], b->size[0]);
-
- if (b->nDimension == 1) {
- b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],
- b->stride[0], 1, 0);
- free_b = 1;
- }
-
- int n, nrhs, lda, ldb, info;
- THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS
- THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS
-
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
- rb__ = THTensor_(cloneColumnMajor)(rb_, b);
-
- n = (int)ra__->size[0];
- nrhs = (int)rb__->size[1];
- lda = n;
- ldb = n;
-
- THLapack_(trtrs)(uplo[0], trans[0], diag[0], n, nrhs,
- THTensor_(data)(ra__), lda,
- THTensor_(data)(rb__), ldb, &info);
-
-
- THLapackCheckWithCleanup("Lapack Error in %s : A(%d,%d) is zero, singular A",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(rb__);
- if (free_b) THTensor_(free)(b);),
- "trtrs", info, info);
-
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(freeCopyTo)(rb__, rb_);
- if (free_b) THTensor_(free)(b);
-}
-
-void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a)
-{
- int free_b = 0;
- // Note that a = NULL is interpreted as a = ra_, and b = NULL as b = rb_.
- if (a == NULL) a = ra_;
- if (b == NULL) b = rb_;
- THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d",
- a->nDimension);
- THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 "
- "dimensions, but has %d", b->nDimension);
- THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld "
- "rows, B has %ld", a->size[0], b->size[0]);
-
- if (b->nDimension == 1) {
- b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],
- b->stride[0], 1, 0);
- free_b = 1;
- }
-
- int m, n, nrhs, lda, ldb, info, lwork;
- THTensor *work = NULL;
- real wkopt = 0;
-
- THTensor *ra__ = NULL; // working version of A matrix to be passed into lapack GELS
- THTensor *rb__ = NULL; // working version of B matrix to be passed into lapack GELS
-
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
-
- m = ra__->size[0];
- n = ra__->size[1];
- lda = m;
- ldb = (m > n) ? m : n;
-
- rb__ = THTensor_(cloneColumnMajorNrows)(rb_, b, ldb);
-
- nrhs = rb__->size[1];
- info = 0;
-
-
- /* get optimal workspace size */
- THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda,
- THTensor_(data)(rb__), ldb,
- &wkopt, -1, &info);
- lwork = (int)wkopt;
- work = THTensor_(newWithSize1d)(lwork);
- THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda,
- THTensor_(data)(rb__), ldb,
- THTensor_(data)(work), lwork, &info);
-
- THLapackCheckWithCleanup("Lapack Error in %s : The %d-th diagonal element of the triangular factor of A is zero",
- THCleanup(THTensor_(free)(ra__);
- THTensor_(free)(rb__);
- THTensor_(free)(work);
- if (free_b) THTensor_(free)(b);),
- "gels", info,"");
-
- /* rb__ is currently ldb by nrhs; resize it to n by nrhs */
- rb__->size[0] = n;
- if (rb__ != rb_)
- THTensor_(resize2d)(rb_, n, nrhs);
-
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(freeCopyTo)(rb__, rb_);
- THTensor_(free)(work);
- if (free_b) THTensor_(free)(b);
-}
-
-void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr)
-{
- int n, lda, lwork, info, ldvr;
- THTensor *work, *wi, *wr, *a;
- real wkopt;
- real *rv_data;
- long i;
-
- THTensor *re__ = NULL;
- THTensor *rv__ = NULL;
-
- THArgCheck(a_->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a_->size[0] == a_->size[1], 1,"A should be square");
-
- /* we want to definitely clone a_ for geev*/
- a = THTensor_(cloneColumnMajor)(NULL, a_);
-
- n = a->size[0];
- lda = n;
-
- wi = THTensor_(newWithSize1d)(n);
- wr = THTensor_(newWithSize1d)(n);
-
- rv_data = NULL;
- ldvr = 1;
- if (*jobvr == 'V')
- {
- THTensor_(resize2d)(rv_,n,n);
- /* guard against someone passing a correct size, but wrong stride */
- rv__ = THTensor_(newTransposedContiguous)(rv_);
- rv_data = THTensor_(data)(rv__);
- ldvr = n;
- }
- THTensor_(resize2d)(re_,n,2);
- re__ = THTensor_(newContiguous)(re_);
-
- /* get optimal workspace size */
- THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi),
- NULL, 1, rv_data, ldvr, &wkopt, -1, &info);
-
- lwork = (int)wkopt;
- work = THTensor_(newWithSize1d)(lwork);
-
- THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi),
- NULL, 1, rv_data, ldvr, THTensor_(data)(work), lwork, &info);
-
- THLapackCheckWithCleanup(" Lapack Error in %s : %d off-diagonal elements of an didn't converge to zero",
- THCleanup(THTensor_(free)(re__);
- THTensor_(free)(rv__);
- THTensor_(free)(a);
- THTensor_(free)(wi);
- THTensor_(free)(wr);
- THTensor_(free)(work);),
- "geev", info,"");
-
- {
- real *re_data = THTensor_(data)(re__);
- real *wi_data = THTensor_(data)(wi);
- real *wr_data = THTensor_(data)(wr);
- for (i=0; i<n; i++)
- {
- re_data[2*i] = wr_data[i];
- re_data[2*i+1] = wi_data[i];
- }
- }
-
- if (*jobvr == 'V')
- {
- THTensor_(checkTransposed)(rv_);
- THTensor_(freeCopyTo)(rv__, rv_);
- }
- THTensor_(freeCopyTo)(re__, re_);
- THTensor_(free)(a);
- THTensor_(free)(wi);
- THTensor_(free)(wr);
- THTensor_(free)(work);
-}
-
-void THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a, const char *jobz, const char *uplo)
-{
- if (a == NULL) a = rv_;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a->size[0] == a->size[1], 1,"A should be square");
-
- int n, lda, lwork, info;
- THTensor *work;
- real wkopt;
-
- THTensor *rv__ = NULL;
- THTensor *re__ = NULL;
-
- rv__ = THTensor_(cloneColumnMajor)(rv_, a);
-
- n = rv__->size[0];
- lda = n;
-
- THTensor_(resize1d)(re_,n);
- re__ = THTensor_(newContiguous)(re_);
-
- /* get optimal workspace size */
- THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda,
- THTensor_(data)(re_), &wkopt, -1, &info);
- lwork = (int)wkopt;
- work = THTensor_(newWithSize1d)(lwork);
- THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda,
- THTensor_(data)(re_), THTensor_(data)(work), lwork, &info);
-
- THLapackCheckWithCleanup("Lapack Error %s : %d off-diagonal elements didn't converge to zero",
- THCleanup(THTensor_(free)(rv__);
- THTensor_(free)(re__);
- THTensor_(free)(work);),
- "syev", info,"");
-
- THTensor_(freeCopyTo)(rv__, rv_);
- THTensor_(freeCopyTo)(re__, re_);
- THTensor_(free)(work);
-}
-
-void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char* jobu)
-{
- THTensor *ra_ = THTensor_(new)();
- THTensor_(gesvd2)(ru_, rs_, rv_, ra_, a, jobu);
- THTensor_(free)(ra_);
-}
-
-void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char* jobu)
-{
- if (a == NULL) a = ra_;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
-
- int k,m, n, lda, ldu, ldvt, lwork, info;
- THTensor *work;
- THTensor *rvf_ = THTensor_(new)();
- real wkopt;
-
- THTensor *ra__ = NULL;
- THTensor *ru__ = NULL;
- THTensor *rs__ = NULL;
- THTensor *rv__ = NULL;
-
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
-
- m = ra__->size[0];
- n = ra__->size[1];
- k = (m < n ? m : n);
-
- lda = m;
- ldu = m;
- ldvt = n;
-
- THTensor_(resize1d)(rs_,k);
- THTensor_(resize2d)(rvf_,ldvt,n);
- if (*jobu == 'A')
- THTensor_(resize2d)(ru_,m,ldu);
- else
- THTensor_(resize2d)(ru_,k,ldu);
-
- THTensor_(checkTransposed)(ru_);
-
- /* guard against someone passing a correct size, but wrong stride */
- ru__ = THTensor_(newTransposedContiguous)(ru_);
- rs__ = THTensor_(newContiguous)(rs_);
- rv__ = THTensor_(newContiguous)(rvf_);
-
- THLapack_(gesvd)(jobu[0],jobu[0],
- m,n,THTensor_(data)(ra__),lda,
- THTensor_(data)(rs__),
- THTensor_(data)(ru__),
- ldu,
- THTensor_(data)(rv__), ldvt,
- &wkopt, -1, &info);
- lwork = (int)wkopt;
- work = THTensor_(newWithSize1d)(lwork);
- THLapack_(gesvd)(jobu[0],jobu[0],
- m,n,THTensor_(data)(ra__),lda,
- THTensor_(data)(rs__),
- THTensor_(data)(ru__),
- ldu,
- THTensor_(data)(rv__), ldvt,
- THTensor_(data)(work),lwork, &info);
-
- THLapackCheckWithCleanup(" Lapack Error %s : %d superdiagonals failed to converge.",
- THCleanup(
- THTensor_(free)(ru__);
- THTensor_(free)(rs__);
- THTensor_(free)(rv__);
- THTensor_(free)(ra__);
- THTensor_(free)(work);),
- "gesvd", info,"");
-
- if (*jobu == 'S')
- THTensor_(narrow)(rv__,NULL,1,0,k);
-
- THTensor_(freeCopyTo)(ru__, ru_);
- THTensor_(freeCopyTo)(rs__, rs_);
- THTensor_(freeCopyTo)(rv__, rvf_);
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(free)(work);
-
- if (*jobu == 'S') {
- THTensor_(narrow)(rvf_,NULL,1,0,k);
- }
- THTensor_(resizeAs)(rv_, rvf_);
- THTensor_(copy)(rv_, rvf_);
- THTensor_(free)(rvf_);
-}
-
-void THTensor_(getri)(THTensor *ra_, THTensor *a)
-{
- if (a == NULL) a = ra_;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a->size[0] == a->size[1], 1, "A should be square");
-
- int m, n, lda, info, lwork;
- real wkopt;
- THIntTensor *ipiv;
- THTensor *work;
- THTensor *ra__ = NULL;
-
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
-
- m = ra__->size[0];
- n = ra__->size[1];
- lda = m;
- ipiv = THIntTensor_newWithSize1d((long)m);
-
- /* Run LU */
- THLapack_(getrf)(n, n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &info);
- THLapackCheckWithCleanup("Lapack Error %s : U(%d,%d) is 0, U is singular",
- THCleanup(
- THTensor_(free)(ra__);
- THIntTensor_free(ipiv);),
- "getrf", info, info);
-
- /* Run inverse */
- THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &wkopt, -1, &info);
- lwork = (int)wkopt;
- work = THTensor_(newWithSize1d)(lwork);
- THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), THTensor_(data)(work), lwork, &info);
- THLapackCheckWithCleanup("Lapack Error %s : U(%d,%d) is 0, U is singular",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(work);
- THIntTensor_free(ipiv);),
- "getri", info, info);
-
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(free)(work);
- THIntTensor_free(ipiv);
-}
-
-void THTensor_(clearUpLoTriangle)(THTensor *a, const char *uplo)
-{
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a->size[0] == a->size[1], 1, "A should be square");
-
- int n = a->size[0];
-
- /* Build full matrix */
- real *p = THTensor_(data)(a);
- long i, j;
-
- /* Upper Triangular Case */
- if (uplo[0] == 'U')
- {
- /* Clear lower triangle (excluding diagonals) */
- for (i=0; i<n; i++) {
- for (j=i+1; j<n; j++) {
- p[n*i + j] = 0;
- }
- }
- }
- /* Lower Triangular Case */
- else if (uplo[0] == 'L')
- {
- /* Clear upper triangle (excluding diagonals) */
- for (i=0; i<n; i++) {
- for (j=0; j<i; j++) {
- p[n*i + j] = 0;
- }
- }
- }
-}
-
-void THTensor_(copyUpLoTriangle)(THTensor *a, const char *uplo)
-{
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a->size[0] == a->size[1], 1, "A should be square");
-
- int n = a->size[0];
-
- /* Build full matrix */
- real *p = THTensor_(data)(a);
- long i, j;
-
- /* Upper Triangular Case */
- if (uplo[0] == 'U')
- {
- /* Clear lower triangle (excluding diagonals) */
- for (i=0; i<n; i++) {
- for (j=i+1; j<n; j++) {
- p[n*i + j] = p[n*j+i];
- }
- }
- }
- /* Lower Triangular Case */
- else if (uplo[0] == 'L')
- {
- /* Clear upper triangle (excluding diagonals) */
- for (i=0; i<n; i++) {
- for (j=0; j<i; j++) {
- p[n*i + j] = p[n*j+i];
- }
- }
- }
-}
-
-void THTensor_(potrf)(THTensor *ra_, THTensor *a, const char *uplo)
-{
- if (a == NULL) a = ra_;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a->size[0] == a->size[1], 1, "A should be square");
-
- int n, lda, info;
- THTensor *ra__ = NULL;
-
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
-
- n = ra__->size[0];
- lda = n;
-
- /* Run Factorization */
- THLapack_(potrf)(uplo[0], n, THTensor_(data)(ra__), lda, &info);
- THLapackCheckWithCleanup("Lapack Error in %s : the leading minor of order %d is not positive definite",
- THCleanup(THTensor_(free)(ra__);),
- "potrf", info, "");
-
- THTensor_(clearUpLoTriangle)(ra__, uplo);
- THTensor_(freeCopyTo)(ra__, ra_);
-}
-
-void THTensor_(potrs)(THTensor *rb_, THTensor *b, THTensor *a, const char *uplo)
-{
- int free_b = 0;
- if (b == NULL) b = rb_;
-
- THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d",
- a->nDimension);
- THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 "
- "dimensions, but has %d", b->nDimension);
- THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld",
- a->size[0], a->size[1]);
- THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld "
- "rows, B has %ld", a->size[0], b->size[0]);
-
- if (b->nDimension == 1) {
- b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],
- b->stride[0], 1, 0);
- free_b = 1;
- }
-
- int n, nrhs, lda, ldb, info;
- THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS
- THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS
-
- ra__ = THTensor_(cloneColumnMajor)(NULL, a);
- rb__ = THTensor_(cloneColumnMajor)(rb_, b);
-
- n = (int)ra__->size[0];
- nrhs = (int)rb__->size[1];
- lda = n;
- ldb = n;
-
- THLapack_(potrs)(uplo[0], n, nrhs, THTensor_(data)(ra__),
- lda, THTensor_(data)(rb__), ldb, &info);
-
-
- THLapackCheckWithCleanup("Lapack Error in %s : A(%d,%d) is zero, singular A",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(rb__);
- if (free_b) THTensor_(free)(b);),
- "potrs", info, info);
-
- if (free_b) THTensor_(free)(b);
- THTensor_(free)(ra__);
- THTensor_(freeCopyTo)(rb__, rb_);
-}
-
-void THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo)
-{
- if (a == NULL) a = ra_;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a->size[0] == a->size[1], 1, "A should be square");
-
- int n, lda, info;
- THTensor *ra__ = NULL;
-
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
-
- n = ra__->size[0];
- lda = n;
-
- /* Run inverse */
- THLapack_(potri)(uplo[0], n, THTensor_(data)(ra__), lda, &info);
- THLapackCheckWithCleanup("Lapack Error %s : A(%d,%d) is 0, A cannot be factorized",
- THCleanup(THTensor_(free)(ra__);),
- "potri", info, info);
-
- THTensor_(copyUpLoTriangle)(ra__, uplo);
- THTensor_(freeCopyTo)(ra__, ra_);
-}
-
-/*
- Computes the Cholesky factorization with complete pivoting of a real symmetric
- positive semidefinite matrix.
-
- Args:
- * `ra_` - result Tensor in which to store the factor U or L from the
- Cholesky factorization.
- * `rpiv_` - result IntTensor containing sparse permutation matrix P, encoded
- as P[rpiv_[k], k] = 1.
- * `a` - input Tensor; the input matrix to factorize.
- * `uplo` - string; specifies whether the upper or lower triangular part of
- the symmetric matrix A is stored. "U"/"L" for upper/lower
- triangular.
- * `tol` - double; user defined tolerance, or < 0 for automatic choice.
- The algorithm terminates when the pivot <= tol.
- */
-void THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor *a, const char *uplo, real tol) {
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
- THArgCheck(a->size[0] == a->size[1], 1, "A should be square");
-
- int n = a->size[0];
-
- THTensor *ra__ = THTensor_(cloneColumnMajor)(ra_, a);
- THIntTensor_resize1d(rpiv_, n);
-
- // Allocate working tensor
- THTensor *work = THTensor_(newWithSize1d)(2 * n);
-
- // Run Cholesky factorization
- int lda = n;
- int rank, info;
-
- THLapack_(pstrf)(uplo[0], n, THTensor_(data)(ra__), lda,
- THIntTensor_data(rpiv_), &rank, tol,
- THTensor_(data)(work), &info);
-
- THLapackCheckWithCleanup("Lapack Error %s : matrix is rank deficient or not positive semidefinite",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(work);),
- "pstrf", info,"");
-
- THTensor_(clearUpLoTriangle)(ra__, uplo);
-
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(free)(work);
-}
-
-/*
- Perform a QR decomposition of a matrix.
-
- In LAPACK, two parts of the QR decomposition are implemented as two separate
- functions: geqrf and orgqr. For flexibility and efficiency, these are wrapped
- directly, below - but to make the common usage convenient, we also provide
- this function, which calls them both and returns the results in a more
- intuitive form.
-
- Args:
- * `rq_` - result Tensor in which to store the Q part of the decomposition.
- * `rr_` - result Tensor in which to store the R part of the decomposition.
- * `a` - input Tensor; the matrix to decompose.
-
-*/
-void THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a)
-{
- int m = a->size[0];
- int n = a->size[1];
- int k = (m < n ? m : n);
- THTensor *ra_ = THTensor_(new)();
- THTensor *rtau_ = THTensor_(new)();
- THTensor *rr__ = THTensor_(new)();
- THTensor_(geqrf)(ra_, rtau_, a);
- THTensor_(resize2d)(rr__, k, ra_->size[1]);
- THTensor_(narrow)(rr__, ra_, 0, 0, k);
- THTensor_(triu)(rr_, rr__, 0);
- THTensor_(resize2d)(rq_, ra_->size[0], k);
- THTensor_(orgqr)(rq_, ra_, rtau_);
- THTensor_(narrow)(rq_, rq_, 1, 0, k);
- THTensor_(free)(ra_);
- THTensor_(free)(rtau_);
- THTensor_(free)(rr__);
-}
-
-/*
- The geqrf function does the main work of QR-decomposing a matrix.
- However, rather than producing a Q matrix directly, it produces a sequence of
- elementary reflectors which may later be composed to construct Q - for example
- with the orgqr function, below.
-
- Args:
- * `ra_` - Result matrix which will contain:
- i) The elements of R, on and above the diagonal.
- ii) Directions of the reflectors implicitly defining Q.
- * `rtau_` - Result tensor which will contain the magnitudes of the reflectors
- implicitly defining Q.
- * `a` - Input matrix, to decompose. If NULL, `ra_` is used as input.
-
- For further details, please see the LAPACK documentation.
-
-*/
-void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a)
-{
- if (a == NULL) ra_ = a;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
-
- THTensor *ra__ = NULL;
-
- /* Prepare the input for LAPACK, making a copy if necessary. */
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
-
- int m = ra__->size[0];
- int n = ra__->size[1];
- int k = (m < n ? m : n);
- int lda = m;
- THTensor_(resize1d)(rtau_, k);
-
- /* Dry-run to query the suggested size of the workspace. */
- int info = 0;
- real wkopt = 0;
- THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda,
- THTensor_(data)(rtau_),
- &wkopt, -1, &info);
-
- /* Allocate the workspace and call LAPACK to do the real work. */
- int lwork = (int)wkopt;
- THTensor *work = THTensor_(newWithSize1d)(lwork);
- THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda,
- THTensor_(data)(rtau_),
- THTensor_(data)(work), lwork, &info);
-
- THLapackCheckWithCleanup("Lapack Error %s : unknown Lapack error. info = %i",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(work);),
- "geqrf", info,"");
-
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(free)(work);
-}
-
-/*
- The orgqr function allows reconstruction of a matrix Q with orthogonal
- columns, from a sequence of elementary reflectors, such as is produced by the
- geqrf function.
-
- Args:
- * `ra_` - result Tensor, which will contain the matrix Q.
- * `a` - input Tensor, which should be a matrix with the directions of the
- elementary reflectors below the diagonal. If NULL, `ra_` is used as
- input.
- * `tau` - input Tensor, containing the magnitudes of the elementary
- reflectors.
-
- For further details, please see the LAPACK documentation.
-
-*/
-void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau)
-{
- if (a == NULL) a = ra_;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
-
- THTensor *ra__ = NULL;
- ra__ = THTensor_(cloneColumnMajor)(ra_, a);
-
- int m = ra__->size[0];
- int n = ra__->size[1];
- int k = tau->size[0];
- int lda = m;
-
- /* Dry-run to query the suggested size of the workspace. */
- int info = 0;
- real wkopt = 0;
- THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda,
- THTensor_(data)(tau),
- &wkopt, -1, &info);
-
- /* Allocate the workspace and call LAPACK to do the real work. */
- int lwork = (int)wkopt;
- THTensor *work = THTensor_(newWithSize1d)(lwork);
- THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda,
- THTensor_(data)(tau),
- THTensor_(data)(work), lwork, &info);
-
- THLapackCheckWithCleanup(" Lapack Error %s : unknown Lapack error. info = %i",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(work);),
- "orgqr", info,"");
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(free)(work);
-}
-
-/*
- The ormqr function multiplies Q with another matrix from a sequence of
- elementary reflectors, such as is produced by the geqrf function.
-
- Args:
- * `ra_` - result Tensor, which will contain the matrix Q' c.
- * `a` - input Tensor, which should be a matrix with the directions of the
- elementary reflectors below the diagonal. If NULL, `ra_` is used as
- input.
- * `tau` - input Tensor, containing the magnitudes of the elementary
- reflectors.
- * `c` - input Tensor, containing the matrix to be multiplied.
- * `side` - char, determining whether c is left- or right-multiplied with Q.
- * `trans` - char, determining whether to transpose Q before multiplying.
-
- For further details, please see the LAPACK documentation.
-
-*/
-void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans)
-{
- if (a == NULL) a = ra_;
- THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional");
-
- THTensor *ra__ = NULL;
- ra__ = THTensor_(cloneColumnMajor)(ra_, c);
-
- int m = c->size[0];
- int n = c->size[1];
- int k = tau->size[0];
- int lda;
- if (*side == 'L')
- {
- lda = m;
- }
- else
- {
- lda = n;
- }
- int ldc = m;
-
- /* Dry-run to query the suggested size of the workspace. */
- int info = 0;
- real wkopt = 0;
- THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda,
- THTensor_(data)(tau), THTensor_(data)(ra__), ldc,
- &wkopt, -1, &info);
-
- /* Allocate the workspace and call LAPACK to do the real work. */
- int lwork = (int)wkopt;
- THTensor *work = THTensor_(newWithSize1d)(lwork);
- THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda,
- THTensor_(data)(tau), THTensor_(data)(ra__), ldc,
- THTensor_(data)(work), lwork, &info);
-
- THLapackCheckWithCleanup(" Lapack Error %s : unknown Lapack error. info = %i",
- THCleanup(
- THTensor_(free)(ra__);
- THTensor_(free)(work);),
- "ormqr", info,"");
- THTensor_(freeCopyTo)(ra__, ra_);
- THTensor_(free)(work);
-}
-
-void THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a)
-{
- THArgCheck(THTensor_(nDimension)(a) == 3, 1, "expected 3D tensor, got %dD", THTensor_(nDimension)(a));
- if (!pivot) {
- THError("btrifact without pivoting is not implemented on the CPU");
- }
-
- if (ra_ != a) {
- THTensor_(resizeAs)(ra_, a);
- THTensor_(copy)(ra_, a);
- }
-
- int m = a->size[1];
- int n = a->size[2];
- if (m != n) {
- THError("btrifact is only implemented for square matrices");
- }
- long num_batches = THTensor_(size)(a, 0);
- THTensor *ra__;
- int lda;
-
- if (ra_->stride[1] == 1) {
- // column ordered, what BLAS wants
- lda = ra_->stride[2];
- ra__ = ra_;
- } else {
- // not column ordered, need to make it such (requires copy)
- THTensor *transp_r_ = THTensor_(newTranspose)(ra_, 1, 2);
- ra__ = THTensor_(newClone)(transp_r_);
- THTensor_(free)(transp_r_);
- THTensor_(transpose)(ra__, NULL, 1, 2);
- lda = ra__->stride[2];
- }
-
- THTensor *ai = THTensor_(new)();
- THTensor *rai = THTensor_(new)();
- THIntTensor *rpivoti = THIntTensor_new();
-
- int info = 0;
- int *info_ptr = &info;
- if (rinfo_) {
- THIntTensor_resize1d(rinfo_, num_batches);
- info_ptr = THIntTensor_data(rinfo_);
- }
-
- THIntTensor_resize2d(rpivots_, num_batches, n);
-
- long batch = 0;
- for (; batch < num_batches; ++batch) {
- THTensor_(select)(ai, a, 0, batch);
- THTensor_(select)(rai, ra__, 0, batch);
- THIntTensor_select(rpivoti, rpivots_, 0, batch);
-
- THLapack_(getrf)(n, n, THTensor_(data)(rai), lda,
- THIntTensor_data(rpivoti), info_ptr);
- if (rinfo_) {
- info_ptr++;
- } else if (info != 0) {
- break;
- }
- }
-
- THTensor_(free)(ai);
- THTensor_(free)(rai);
- THIntTensor_free(rpivoti);
-
- if (ra__ != ra_) {
- THTensor_(freeCopyTo)(ra__, ra_);
- }
-
- if (!rinfo_ && info != 0) {
- THError("failed to factorize batch element %ld (info == %d)", batch, info);
- }
-}
-
-void THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots)
-{
- THArgCheck(THTensor_(nDimension)(atf) == 3, 1, "expected 3D tensor, got %dD",
- THTensor_(nDimension)(atf));
- THArgCheck(THTensor_(nDimension)(b) == 3 ||
- THTensor_(nDimension)(b) == 2, 4, "expected 2D or 3D tensor");
- THArgCheck(THTensor_(size)(atf, 0) ==
- THTensor_(size)(b, 0), 3, "number of batches must be equal");
- THArgCheck(THTensor_(size)(atf, 1) ==
- THTensor_(size)(atf, 2), 3, "A matrices must be square");
- THArgCheck(THTensor_(size)(atf, 1) ==
- THTensor_(size)(b, 1), 3, "dimensions of A and b must be equal");
-
- if (rb_ != b) {
- THTensor_(resizeAs)(rb_, b);
- THTensor_(copy)(rb_, b);
- }
-
- long num_batches = atf->size[0];
- long n = atf->size[1];
- int nrhs = rb_->nDimension > 2 ? rb_->size[2] : 1;
-
- int lda, ldb;
- THTensor *atf_;
- THTensor *rb__;
-
- // correct ordering of A
- if (atf->stride[1] == 1) {
- // column ordered, what BLAS wants
- lda = atf->stride[2];
- atf_ = atf;
- } else {
- // not column ordered, need to make it such (requires copy)
- // it would be nice if we could use the op(A) flags to automatically
- // transpose A if needed, but this leads to unpredictable behavior if the
- // user clones A_tf later with a different ordering
- THTensor *transp_r_ = THTensor_(newTranspose)(atf, 1, 2);
- atf_ = THTensor_(newClone)(transp_r_);
- THTensor_(free)(transp_r_);
- THTensor_(transpose)(atf_, NULL, 1, 2);
- lda = atf_->stride[2];
- }
-
- // correct ordering of B
- if (rb_->stride[1] == 1) {
- // column ordered
- if (rb_->nDimension == 2 || rb_->size[2] == 1) {
- ldb = n;
- } else {
- ldb = rb_->stride[2];
- }
- rb__ = rb_;
- } else {
- // make column ordered
- if (rb_->nDimension > 2) {
- THTensor *transp_r_ = THTensor_(newTranspose)(rb_, 1, 2);
- rb__ = THTensor_(newClone)(transp_r_);
- THTensor_(free)(transp_r_);
- THTensor_(transpose)(rb__, NULL, 1, 2);
- ldb = rb__->stride[2];
- } else {
- rb__ = THTensor_(newClone)(rb_);
- ldb = n;
- }
- }
-
- THTensor *ai = THTensor_(new)();
- THTensor *rbi = THTensor_(new)();
- THIntTensor *pivoti = THIntTensor_new();
-
- if (!THIntTensor_isContiguous(pivots)) {
- THError("Error: rpivots_ is not contiguous.");
- }
-
- for (long batch = 0; batch < num_batches; ++batch) {
- THTensor_(select)(ai, atf_, 0, batch);
- THTensor_(select)(rbi, rb__, 0, batch);
- THIntTensor_select(pivoti, pivots, 0, batch);
-
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- int info;
- THLapack_(getrs)('N', n, nrhs, THTensor_(data)(ai), lda,
- THIntTensor_data(pivoti), THTensor_(data)(rbi),
- ldb, &info);
- if (info != 0) {
- THError("Error: Nonzero info.");
- }
-#else
- THError("Unimplemented");
-#endif
- }
-
- THTensor_(free)(ai);
- THTensor_(free)(rbi);
- THIntTensor_free(pivoti);
-
- if (atf_ != atf) {
- THTensor_(free)(atf_);
- }
-
- if (rb__ != rb_) {
- THTensor_(freeCopyTo)(rb__, rb_);
- }
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.h
deleted file mode 100644
index 878594348..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorLapack.h"
-#else
-
-TH_API void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_);
-TH_API void THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_, const char *uplo, const char *trans, const char *diag);
-TH_API void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_);
-TH_API void THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobz, const char *uplo);
-TH_API void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr);
-TH_API void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char *jobu);
-TH_API void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char *jobu);
-TH_API void THTensor_(getri)(THTensor *ra_, THTensor *a);
-TH_API void THTensor_(potrf)(THTensor *ra_, THTensor *a, const char *uplo);
-TH_API void THTensor_(potrs)(THTensor *rb_, THTensor *b_, THTensor *a_, const char *uplo);
-TH_API void THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo);
-TH_API void THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a);
-TH_API void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a);
-TH_API void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau);
-TH_API void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans);
-TH_API void THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor*a, const char* uplo, real tol);
-
-TH_API void THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a);
-TH_API void THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.c
deleted file mode 100644
index db7a0cb19..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.c
+++ /dev/null
@@ -1,3275 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorMath.c"
-#else
-
-#ifndef NAN
- #define NAN (nan(NULL))
-#endif
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#define TH_OMP_OVERHEAD_THRESHOLD 100000
-
-#ifdef _OPENMP
-
-#ifndef _WIN32
-#define PRAGMA(P) _Pragma(#P)
-#else
-#define PRAGMA(P) __pragma(P)
-#endif
-
-#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \
-{ \
- ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR); \
- PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \
- { \
- size_t num_threads = omp_get_num_threads(); \
- size_t tid = omp_get_thread_num(); \
- ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \
- ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \
- TH_TENSOR_offset + TH_TENSOR_size / num_threads; \
- ptrdiff_t TENSOR##_len = TH_TENSOR_end - TH_TENSOR_offset; \
- TYPE *TENSOR##_data = THTensor_(data)(TENSOR) + TH_TENSOR_offset; \
- CODE \
- } \
-}
-#else
-#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \
-{ \
- TYPE *TENSOR##_data = THTensor_(data)(TENSOR); \
- ptrdiff_t TENSOR##_len = THTensor_(nElement)(TENSOR); \
- CODE \
-}
-#endif
-
-#ifdef _OPENMP
-#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
-{ \
- ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \
- PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \
- { \
- size_t num_threads = omp_get_num_threads(); \
- size_t tid = omp_get_thread_num(); \
- ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \
- ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \
- TH_TENSOR_offset + TH_TENSOR_size / num_threads; \
- ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \
- TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \
- TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \
- CODE \
- } \
-}
-#else
-#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \
-{ \
- TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \
- TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \
- ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \
- CODE \
-}
-#endif
-
-#ifdef _OPENMP
-#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
-{ \
- ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \
- PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \
- { \
- size_t num_threads = omp_get_num_threads(); \
- size_t tid = omp_get_thread_num(); \
- ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \
- ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \
- TH_TENSOR_offset + TH_TENSOR_size / num_threads; \
- ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \
- TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \
- TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \
- TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3) + TH_TENSOR_offset; \
- CODE \
- } \
-}
-#else
-#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \
-{ \
- TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \
- TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \
- TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3); \
- ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \
- CODE \
-}
-#endif
-
-void THTensor_(fill)(THTensor *r_, real value)
-{
- if (THTensor_(isContiguous)(r_) || THTensor_(isTransposed)(r_)) {
- TH_TENSOR_APPLY_CONTIG(real, r_, THVector_(fill)(r__data, value, r__len););
- } else {
- TH_TENSOR_APPLY(real, r_,
- if (r__stride == 1) {
- THVector_(fill)(r__data, value, r__size);
- r__i = r__size;
- r__data += r__stride * r__size;
- break;
- } else {
- *r__data = value;
- }
- );
- }
-}
-
-void THTensor_(zero)(THTensor *r_)
-{
- THTensor_(fill)(r_, 0);
-}
-
-void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value)
-{
- TH_TENSOR_APPLY2(real, tensor, unsigned char, mask,
- if (*mask_data > 1)
- {
- THFree(mask_counter);
- THFree(tensor_counter);
- THError("Mask tensor can take 0 and 1 values only");
- }
- else if (*mask_data == 1)
- {
- *tensor_data = value;
- });
-}
-
-void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src )
-{
- THTensor *srct = THTensor_(newContiguous)(src);
- real *src_data = THTensor_(data)(srct);
- ptrdiff_t cntr = 0;
- ptrdiff_t nelem = THTensor_(nElement)(srct);
- if (THTensor_(nElement)(tensor) != THByteTensor_nElement(mask))
- {
- THTensor_(free)(srct);
- THError("Number of elements of destination tensor != Number of elements in mask");
- }
- TH_TENSOR_APPLY2(real, tensor, unsigned char, mask,
- if (*mask_data > 1)
- {
- THTensor_(free)(srct);
- THFree(mask_counter);
- THFree(tensor_counter);
- THError("Mask tensor can take 0 and 1 values only");
- }
- else if (*mask_data == 1)
- {
- if (cntr == nelem)
- {
- THTensor_(free)(srct);
- THFree(mask_counter);
- THFree(tensor_counter);
- THError("Number of elements of src < number of ones in mask");
- }
- *tensor_data = *src_data;
- src_data++;
- cntr++;
- });
- THTensor_(free)(srct);
-}
-
-void THTensor_(maskedSelect)(THTensor *tensor, THTensor *src, THByteTensor *mask)
-{
- ptrdiff_t numel = THByteTensor_sumall(mask);
- real *tensor_data;
-
-#ifdef DEBUG
- THAssert(numel <= LONG_MAX);
-#endif
- THTensor_(resize1d)(tensor,numel);
- tensor_data = THTensor_(data)(tensor);
- TH_TENSOR_APPLY2(real, src, unsigned char, mask,
- if (*mask_data > 1)
- {
- THFree(mask_counter);
- THFree(src_counter);
- THError("Mask tensor can take 0 and 1 values only");
- }
- else if (*mask_data == 1)
- {
- *tensor_data = *src_data;
- tensor_data++;
- });
-}
-
-// Finds non-zero elements of a tensor and returns their subscripts
-void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor)
-{
- ptrdiff_t numel = 0;
- long *subscript_data;
- long i = 0;
- long dim;
- long div = 1;
-#ifdef TH_REAL_IS_HALF
-#define IS_NONZERO(val) ((val.x & 0x7fff) != 0)
-#else
-#define IS_NONZERO(val) ((val)!=0)
-#endif
-
- /* First Pass to determine size of subscripts */
- TH_TENSOR_APPLY(real, tensor,
- if IS_NONZERO(*tensor_data) {
- ++numel;
- });
-#ifdef DEBUG
- THAssert(numel <= LONG_MAX);
-#endif
- THLongTensor_resize2d(subscript, numel, tensor->nDimension);
-
- /* Second pass populates subscripts */
- subscript_data = THLongTensor_data(subscript);
- TH_TENSOR_APPLY(real, tensor,
- if IS_NONZERO(*tensor_data) {
- div = 1;
-
- for (dim = tensor->nDimension - 1; dim >= 0; dim--) {
- *(subscript_data + dim) = (i/div) % tensor->size[dim];
- div *= tensor->size[dim];
- }
-
- subscript_data += tensor->nDimension;
- }
- ++i;);
-}
-
-void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index)
-{
- ptrdiff_t i, numel;
- THLongStorage *newSize;
- THTensor *tSlice, *sSlice;
- long *index_data;
- real *tensor_data, *src_data;
-
- THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
- THArgCheck(dim < src->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE);
- THArgCheck(src->nDimension > 0,2,"Source tensor is empty");
-
- numel = THLongTensor_nElement(index);
-
- newSize = THLongStorage_newWithSize(src->nDimension);
- THLongStorage_rawCopy(newSize,src->size);
-#ifdef DEBUG
- THAssert(numel <= LONG_MAX);
-#endif
- newSize->data[dim] = numel;
- THTensor_(resize)(tensor,newSize,NULL);
- THLongStorage_free(newSize);
-
- index = THLongTensor_newContiguous(index);
- index_data = THLongTensor_data(index);
-
- if (dim == 0 && THTensor_(isContiguous)(src) && THTensor_(isContiguous)(tensor))
- {
- tensor_data = THTensor_(data)(tensor);
- src_data = THTensor_(data)(src);
- ptrdiff_t rowsize = THTensor_(nElement)(src) / src->size[0];
-
- // check that the indices are within range
- long max = src->size[0] - 1 + TH_INDEX_BASE;
- for (i=0; i<numel; i++) {
- if (index_data[i] < TH_INDEX_BASE || index_data[i] > max) {
- THLongTensor_free(index);
- THError("index out of range");
- }
- }
-
- if (src->nDimension == 1) {
- #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<numel; i++)
- tensor_data[i] = src_data[index_data[i] - TH_INDEX_BASE];
- } else {
- #pragma omp parallel for if(numel*rowsize > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<numel; i++)
- memcpy(tensor_data + i*rowsize, src_data + (index_data[i] - TH_INDEX_BASE)*rowsize, rowsize*sizeof(real));
- }
- }
- else if (src->nDimension == 1)
- {
- for (i=0; i<numel; i++)
- THTensor_(set1d)(tensor,i,THTensor_(get1d)(src,index_data[i] - TH_INDEX_BASE));
- }
- else
- {
- for (i=0; i<numel; i++)
- {
- tSlice = THTensor_(new)();
- sSlice = THTensor_(new)();
- THTensor_(select)(tSlice, tensor, dim, i);
- THTensor_(select)(sSlice, src, dim, index_data[i] - TH_INDEX_BASE);
- THTensor_(copy)(tSlice, sSlice);
- THTensor_(free)(tSlice);
- THTensor_(free)(sSlice);
- }
- }
-
- THLongTensor_free(index);
-}
-
-void THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
-{
- ptrdiff_t i, numel;
- THTensor *tSlice, *sSlice;
- long *index_data;
-
- numel = THLongTensor_nElement(index);
- THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
- THArgCheck(dim < src->nDimension, 4, "Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE);
- THArgCheck(numel == src->size[dim],4,"Number of indices should be equal to source:size(dim)");
-
- index = THLongTensor_newContiguous(index);
- index_data = THLongTensor_data(index);
-
- if (tensor->nDimension > 1 )
- {
- tSlice = THTensor_(new)();
- sSlice = THTensor_(new)();
-
- for (i=0; i<numel; i++)
- {
- THTensor_(select)(tSlice, tensor, dim, index_data[i] - TH_INDEX_BASE);
- THTensor_(select)(sSlice, src, dim, i);
- THTensor_(copy)(tSlice, sSlice);
- }
-
- THTensor_(free)(tSlice);
- THTensor_(free)(sSlice);
- }
- else
- {
- for (i=0; i<numel; i++)
- {
- THTensor_(set1d)(tensor, index_data[i] - TH_INDEX_BASE, THTensor_(get1d)(src,i));
- }
- }
- THLongTensor_free(index);
-}
-
-void THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
-{
- ptrdiff_t i, numel;
- THTensor *tSlice, *sSlice;
- long *index_data;
-
- numel = THLongTensor_nElement(index);
- THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
- THArgCheck(dim < src->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE);
- THArgCheck(numel == src->size[dim],4,"Number of indices should be equal to source:size(dim)");
-
- index = THLongTensor_newContiguous(index);
- index_data = THLongTensor_data(index);
-
- if (tensor->nDimension > 1)
- {
- tSlice = THTensor_(new)();
- sSlice = THTensor_(new)();
-
- for (i=0; i<numel; i++)
- {
- THTensor_(select)(tSlice, tensor, dim, index_data[i] - TH_INDEX_BASE);
- THTensor_(select)(sSlice, src, dim, i);
- THTensor_(cadd)(tSlice, tSlice, 1.0, sSlice);
- }
-
- THTensor_(free)(tSlice);
- THTensor_(free)(sSlice);
- }
- else
- {
- for (i=0; i<numel; i++)
- {
- THTensor_(set1d)(tensor,
- index_data[i] - TH_INDEX_BASE,
- THTensor_(get1d)(src,i) + THTensor_(get1d)(tensor,index_data[i] - TH_INDEX_BASE));
- }
- }
- THLongTensor_free(index);
-}
-
-void THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, real val)
-{
- ptrdiff_t i, numel;
- THTensor *tSlice;
- long *index_data;
-
- numel = THLongTensor_nElement(index);
- THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
- THArgCheck(dim < tensor->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE);
-
- index = THLongTensor_newContiguous(index);
- index_data = THLongTensor_data(index);
-
- for (i=0; i<numel; i++)
- {
- if (tensor->nDimension > 1)
- {
- tSlice = THTensor_(new)();
- THTensor_(select)(tSlice, tensor,dim,index_data[i] - TH_INDEX_BASE);
- THTensor_(fill)(tSlice, val);
- THTensor_(free)(tSlice);
- }
- else
- {
- THTensor_(set1d)(tensor, index_data[i] - TH_INDEX_BASE, val);
- }
- }
- THLongTensor_free(index);
-}
-
-void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index)
-{
- long elems_per_row, i, idx;
-
- THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 2,
- "Input tensor must have same dimensions as output tensor");
- THArgCheck(dim < THTensor_(nDimension)(tensor), 3, "Index dimension is out of bounds");
- THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(src), 4,
- "Index tensor must have same dimensions as input tensor");
-
- elems_per_row = THLongTensor_size(index, dim);
-
- TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim,
- for (i = 0; i < elems_per_row; ++i)
- {
- idx = *(index_data + i*index_stride);
- if (idx < TH_INDEX_BASE || idx >= src_size + TH_INDEX_BASE)
- {
- THFree(TH_TENSOR_DIM_APPLY_counter);
- THError("Invalid index in gather");
- }
- *(tensor_data + i*tensor_stride) = src_data[(idx - TH_INDEX_BASE) * src_stride];
- })
-}
-
-void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
-{
- long elems_per_row, i, idx;
-
- THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds");
- THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3,
- "Index tensor must have same dimensions as output tensor");
- THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4,
- "Input tensor must have same dimensions as output tensor");
-
- elems_per_row = THLongTensor_size(index, dim);
-
- TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim,
- for (i = 0; i < elems_per_row; ++i)
- {
- idx = *(index_data + i*index_stride);
- if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE)
- {
- THFree(TH_TENSOR_DIM_APPLY_counter);
- THError("Invalid index in scatter");
- }
- tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = *(src_data + i*src_stride);
- })
-}
-
-void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)
-{
- long elems_per_row, i, idx;
-
- THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds");
- THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3,
- "Index tensor must have same dimensions as output tensor");
- THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4,
- "Input tensor must have same dimensions as output tensor");
-
- elems_per_row = THLongTensor_size(index, dim);
-
- TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim,
- for (i = 0; i < elems_per_row; ++i)
- {
- idx = *(index_data + i*index_stride);
- if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE)
- {
- THFree(TH_TENSOR_DIM_APPLY_counter);
- THError("Invalid index in scatterAdd");
- }
- tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] += *(src_data + i*src_stride);
- })
-}
-
-void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val)
-{
- long elems_per_row, i, idx;
-
- THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds");
- THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3,
- "Index tensor must have same dimensions as output tensor");
-
- elems_per_row = THLongTensor_size(index, dim);
-
- TH_TENSOR_DIM_APPLY2(real, tensor, long, index, dim,
- for (i = 0; i < elems_per_row; ++i)
- {
- idx = *(index_data + i*index_stride);
- if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE)
- {
- THFree(TH_TENSOR_DIM_APPLY_counter);
- THError("Invalid index in scatter");
- }
- tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = val;
- })
-}
-
-accreal THTensor_(dot)(THTensor *tensor, THTensor *src)
-{
- accreal sum = 0;
- /* we use a trick here. careful with that. */
- TH_TENSOR_APPLY2(real, tensor, real, src,
- long sz = (tensor_size-tensor_i < src_size-src_i ? tensor_size-tensor_i : src_size-src_i);
- sum += THBlas_(dot)(sz, src_data, src_stride, tensor_data, tensor_stride);
- tensor_i += sz;
- src_i += sz;
- tensor_data += sz*tensor_stride;
- src_data += sz*src_stride;
- break;);
- return sum;
-}
-
-
-#undef th_isnan
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
-#define th_isnan(val) \
-(isnan(val))
-#else
-#define th_isnan(val) (0)
-#endif
-
-#undef th_isnan_break
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
-#define th_isnan_break(val) \
-if (isnan(val)) break;
-#else
-#define th_isnan_break(val)
-#endif
-
-real THTensor_(minall)(THTensor *tensor)
-{
- real theMin;
- real value;
-
- THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension");
- theMin = THTensor_(data)(tensor)[0];
- TH_TENSOR_APPLY(real, tensor,
- value = *tensor_data;
- /* This is not the same as value<theMin in the case of NaNs */
- if(!(value >= theMin))
- {
- theMin = value;
- th_isnan_break(value)
- });
- return theMin;
-}
-
-real THTensor_(maxall)(THTensor *tensor)
-{
- real theMax;
- real value;
-
- THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension");
- theMax = THTensor_(data)(tensor)[0];
- TH_TENSOR_APPLY(real, tensor,
- value = *tensor_data;
- /* This is not the same as value>theMax in the case of NaNs */
- if(!(value <= theMax))
- {
- theMax = value;
- th_isnan_break(value)
- });
- return theMax;
-}
-
-static void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride);
-
-real THTensor_(medianall)(THTensor *tensor)
-{
- THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension");
-
- real theMedian;
- ptrdiff_t numel;
- long k;
- THTensor *temp_;
- real *temp__data;
-
- numel = THTensor_(nElement)(tensor);
- k = (numel-1) >> 1;
-
- temp_ = THTensor_(newClone)(tensor);
- temp__data = THTensor_(data)(temp_);
-
- THTensor_(quickselectnoidx)(temp__data, k, numel, 1);
-
- theMedian = temp__data[k];
-
- THTensor_(free)(temp_);
-
- return theMedian;
-}
-
-accreal THTensor_(sumall)(THTensor *tensor)
-{
- accreal sum = 0;
- TH_TENSOR_APPLY(real, tensor, sum += *tensor_data;);
- return sum;
-}
-
-accreal THTensor_(prodall)(THTensor *tensor)
-{
- accreal prod = 1;
- TH_TENSOR_APPLY(real, tensor, prod *= *tensor_data;);
- return prod;
-}
-
-void THTensor_(add)(THTensor *r_, THTensor *t, real value)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(adds)(r__data, t_data, value, r__len););
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;);
- }
-}
-
-void THTensor_(sub)(THTensor *r_, THTensor *t, real value)
-{
- THTensor_(add)(r_, t, -value);
-}
-
-void THTensor_(mul)(THTensor *r_, THTensor *t, real value)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(muls)(r__data, t_data, value, r__len););
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;);
- }
-}
-
-void THTensor_(div)(THTensor *r_, THTensor *t, real value)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(divs)(r__data, t_data, value, r__len););
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;);
- }
-}
-
-void THTensor_(lshift)(THTensor *r_, THTensor *t, real value)
-{
-#if defined(TH_REAL_IS_FLOAT)
- return THTensor_(mul)(r_, t, powf(2, value));
-#elif defined(TH_REAL_IS_DOUBLE)
- return THTensor_(mul)(r_, t, pow(2, value));
-#elif defined(TH_REAL_IS_HALF)
- return THError("lshift is not supported for torch.HalfTensor");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- long sz = THTensor_(nElement)(t);
- long i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_BYTE)
- rp[i] = ((real) tp[i]) << value;
-#else
- rp[i] = ((unsigned real) tp[i]) << value;
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_BYTE)
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((real) *t_data) << value););
-#else
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((unsigned real) *t_data) << value););
-#endif
- }
-#endif
-}
-
-void THTensor_(rshift)(THTensor *r_, THTensor *t, real value)
-{
-#if defined(TH_REAL_IS_FLOAT)
- return THTensor_(div)(r_, t, powf(2, value));
-#elif defined(TH_REAL_IS_DOUBLE)
- return THTensor_(div)(r_, t, pow(2, value));
-#elif defined(TH_REAL_IS_HALF)
- return THError("rshift is not supported for torch.HalfTensor");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- long sz = THTensor_(nElement)(t);
- long i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_BYTE)
- rp[i] = ((real) tp[i]) >> value;
-#else
- rp[i] = ((unsigned real) tp[i]) >> value;
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_BYTE)
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((real) *t_data) >> value););
-#else
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((unsigned real) *t_data) >> value););
-#endif
- }
-#endif
-}
-
-void THTensor_(fmod)(THTensor *r_, THTensor *t, real value)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
-
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- rp[i] = fmod(tp[i], value);
-#else
- rp[i] = tp[i] % value;
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = fmod(*t_data, value););
-#else
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data % value););
-#endif
- }
-}
-
-void THTensor_(remainder)(THTensor *r_, THTensor *t, real value)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- rp[i] = (value == 0)? NAN : tp[i] - value * floor(tp[i] / value);
-#else
- // There is no NAN for integers
- rp[i] = tp[i] % value;
- if (rp[i] * value < 0)
- rp[i] += value;
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value););
-#else
- // There is no NAN for integers
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data % value;
- if (*r__data * value < 0) *r__data += value;);
-#endif
- }
-}
-
-void THTensor_(bitand)(THTensor *r_, THTensor *t, real value)
-{
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)
- return THError("bitand is only supported for integer type tensors");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- long sz = THTensor_(nElement)(t);
- long i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
- for (i=0; i<sz; i++) {
- rp[i] = tp[i] & value;
- }
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data & value;);
- }
-#endif
-}
-
-void THTensor_(bitor)(THTensor *r_, THTensor *t, real value)
-{
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)
- return THError("bitor is only supported for integer type tensors");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- long sz = THTensor_(nElement)(t);
- long i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
- for (i=0; i<sz; i++) {
- rp[i] = tp[i] | value;
- }
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data | value;);
- }
-#endif
-}
-
-void THTensor_(bitxor)(THTensor *r_, THTensor *t, real value)
-{
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)
- return THError("bitxor is only supported for integer type tensors");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- long sz = THTensor_(nElement)(t);
- long i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)
- for (i=0; i<sz; i++) {
- rp[i] = tp[i] ^ value;
- }
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data ^ value;);
- }
-#endif
-}
-
-void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- /* real t_val; */
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++)
- rp[i] = (tp[i] < min_value) ? min_value : (tp[i] > max_value ? max_value : tp[i]);
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data););
- }
-}
-
-void THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- if(r_ == t) {
- THBlas_(axpy)(THTensor_(nElement)(t), value, THTensor_(data)(src), 1, THTensor_(data)(r_), 1);
- } else {
- TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cadd)(r__data, t_data, src_data, value, r__len););
- }
- } else {
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data + value * *src_data;);
- }
-}
-
-void THTensor_(csub)(THTensor *r_, THTensor *t, real value,THTensor *src)
-{
- THTensor_(cadd)(r_, t, -value, src);
-}
-
-void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cmul)(r__data, t_data, src_data, r__len););
- } else {
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * *src_data;);
- }
-}
-
-void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++)
- rp[i] = pow(tp[i], sp[i]);
- } else {
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = pow(*t_data, *src_data););
- }
-}
-
-void THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cdiv)(r__data, t_data, src_data, r__len););
- } else {
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / *src_data;);
- }
-}
-
-void THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src)
-{
-#if defined(TH_REAL_IS_HALF)
- return THError("clshift is not supported for torch.HalfTensor");
-#endif
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(isContiguous)(src) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_FLOAT)
- rp[i] = tp[i] * powf(2, sp[i]);
-#elif defined(TH_REAL_IS_DOUBLE)
- rp[i] = tp[i] * pow(2, sp[i]);
-#elif defined(TH_REAL_IS_BYTE)
- rp[i] = ((real) tp[i]) << sp[i];
-#else
- rp[i] = ((unsigned real) tp[i]) << sp[i];
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_FLOAT)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * powf(2, *src_data););
-#elif defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * pow(2, *src_data););
-#elif defined(TH_REAL_IS_BYTE)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((real)*t_data) << *src_data;);
-#else
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((unsigned real)*t_data) << *src_data;);
-#endif
- }
-}
-
-void THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src)
-{
-#if defined(TH_REAL_IS_HALF)
- return THError("crshift is not supported for torch.HalfTensor");
-#endif
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(isContiguous)(src) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_FLOAT)
- rp[i] = tp[i] / powf(2, sp[i]);
-#elif defined(TH_REAL_IS_DOUBLE)
- rp[i] = tp[i] / pow(2, sp[i]);
-#elif defined(TH_REAL_IS_BYTE)
- rp[i] = ((real) tp[i]) >> sp[i];
-#else
- rp[i] = ((unsigned real) tp[i]) >> sp[i];
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_FLOAT)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / powf(2, *src_data););
-#elif defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / pow(2, *src_data););
-#elif defined(TH_REAL_IS_BYTE)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((real)*t_data) >> *src_data;);
-#else
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((unsigned real)*t_data) >> *src_data;);
-#endif
- }
-}
-
-void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- rp[i] = fmod(tp[i], sp[i]);
-#else
- rp[i] = tp[i] % sp[i];
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = fmod(*t_data, *src_data););
-#else
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = (*t_data % *src_data););
-#endif
-
- }
-}
-
-void THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- rp[i] = (sp[i] == 0)? NAN : tp[i] - sp[i] * floor(tp[i] / sp[i]);
-#else
- // There is no NAN for integers
- rp[i] = tp[i] % sp[i];
- if (rp[i] * sp[i] < 0)
- rp[i] += sp[i];
-#endif
- }
- } else {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = (*src_data == 0)? NAN : *t_data - *src_data * floor(*t_data / *src_data););
-#else
- // There is no NAN for integers
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data % *src_data;
- if (*r__data * *src_data < 0) *r__data += *src_data;);
-#endif
-
- }
-}
-
-void THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src)
-{
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)
- return THError("cbitand is only supported for integer type tensors");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(isContiguous)(src) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
- rp[i] = tp[i] & sp[i];
- }
- } else {
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data & *src_data;);
- }
-#endif
-}
-
-void THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src)
-{
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)
- return THError("cbitor is only supported for integer type tensors");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(isContiguous)(src) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
- rp[i] = tp[i] | sp[i];
- }
- } else {
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data | *src_data;);
- }
-#endif
-}
-
-void THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src)
-{
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)
- return THError("cbitxor is only supported for integer type tensors");
-#else
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) &&
- THTensor_(isContiguous)(t) &&
- THTensor_(isContiguous)(src) &&
- THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {
- real *tp = THTensor_(data)(t);
- real *sp = THTensor_(data)(src);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++) {
- rp[i] = tp[i] ^ sp[i];
- }
- } else {
- TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data ^ *src_data;);
- }
-#endif
-}
-
-void THTensor_(tpow)(THTensor *r_, real value, THTensor *t)
-{
- THTensor_(resizeAs)(r_, t);
- if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {
- real *tp = THTensor_(data)(t);
- real *rp = THTensor_(data)(r_);
- ptrdiff_t sz = THTensor_(nElement)(t);
- ptrdiff_t i;
- #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
- for (i=0; i<sz; i++)
- rp[i] = pow(value, tp[i]);
- } else {
- TH_TENSOR_APPLY2(real, r_, real, t, *r__data = pow(value, *t_data););
- }
-}
-
-void THTensor_(addcmul)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2)
-{
- if(r_ != t)
- {
- THTensor_(resizeAs)(r_, t);
- THTensor_(copy)(r_, t);
- }
-
- TH_TENSOR_APPLY3(real, r_, real, src1, real, src2, *r__data += value * *src1_data * *src2_data;);
-}
-
-
-void THTensor_(addcdiv)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2)
-{
- if(r_ != t)
- {
- THTensor_(resizeAs)(r_, t);
- THTensor_(copy)(r_, t);
- }
-
- TH_TENSOR_APPLY3(real, r_, real, src1, real, src2, *r__data += value * *src1_data / *src2_data;);
-}
-
-void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat, THTensor *vec)
-{
- if( (mat->nDimension != 2) || (vec->nDimension != 1) )
- THError("matrix and vector expected, got %dD, %dD",
- mat->nDimension, vec->nDimension);
-
- if( mat->size[1] != vec->size[0] ) {
- THDescBuff bm = THTensor_(sizeDesc)(mat);
- THDescBuff bv = THTensor_(sizeDesc)(vec);
- THError("size mismatch, %s, %s", bm.str, bv.str);
- }
-
- if(t->nDimension != 1)
- THError("vector expected, got t: %dD", t->nDimension);
-
- if(t->size[0] != mat->size[0]) {
- THDescBuff bt = THTensor_(sizeDesc)(t);
- THDescBuff bm = THTensor_(sizeDesc)(mat);
- THError("size mismatch, t: %s, mat: %s", bt.str, bm.str);
- }
-
- if(r_ != t)
- {
- THTensor_(resizeAs)(r_, t);
- THTensor_(copy)(r_, t);
- }
-
- if(mat->stride[0] == 1)
- {
- THBlas_(gemv)('n', mat->size[0], mat->size[1],
- alpha, THTensor_(data)(mat), mat->stride[1],
- THTensor_(data)(vec), vec->stride[0],
- beta, THTensor_(data)(r_), r_->stride[0]);
- }
- else if(mat->stride[1] == 1)
- {
- THBlas_(gemv)('t', mat->size[1], mat->size[0],
- alpha, THTensor_(data)(mat), mat->stride[0],
- THTensor_(data)(vec), vec->stride[0],
- beta, THTensor_(data)(r_), r_->stride[0]);
- }
- else
- {
- THTensor *cmat = THTensor_(newContiguous)(mat);
-
- THBlas_(gemv)('t', mat->size[1], mat->size[0],
- alpha, THTensor_(data)(cmat), cmat->stride[0],
- THTensor_(data)(vec), vec->stride[0],
- beta, THTensor_(data)(r_), r_->stride[0]);
-
- THTensor_(free)(cmat);
- }
-}
-
-void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain)
-{
- long N1 = m1->size[0];
- long N2 = m2->size[0];
- long dim;
- real *m1_p;
- real *m2_p;
- real *r_p;
- long i;
-
- THTensor_(resize2d)(r_, N1, N2);
-
- m1 = THTensor_(newContiguous)(m1);
- m2 = THTensor_(newContiguous)(m2);
-
- THTensor_(resize2d)(m1, N1, THTensor_(nElement)(m1) / N1);
- THTensor_(resize2d)(m2, N2, THTensor_(nElement)(m2) / N2);
-
- dim = m1->size[1];
- THArgCheck(m1->size[1] == m2->size[1], 3, "m1 and m2 must have the same inner vector dim");
-
- m1_p = THTensor_(data)(m1);
- m2_p = THTensor_(data)(m2);
- r_p = THTensor_(data)(r_);
-
-#pragma omp parallel for private(i)
- for (i=0; i<N1; i++) {
- long j,k;
- for (j=0; j<N2; j++) {
- real sum = 0;
- for (k=0; k<dim; k++) {
- real term = m1_p[ i*dim + k ] - m2_p[ j*dim + k ];
- sum += term*term;
- }
- r_p[ i*N2 + j ] = gain * sum;
- }
- }
-
- THTensor_(free)(m1);
- THTensor_(free)(m2);
-}
-
-void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *m1, THTensor *m2)
-{
- char transpose_r, transpose_m1, transpose_m2;
- THTensor *r__, *m1_, *m2_;
-
- if( (m1->nDimension != 2) || (m2->nDimension != 2))
- THError("matrices expected, got %dD, %dD tensors", m1->nDimension, m2->nDimension);
-
- if(m1->size[1] != m2->size[0]) {
- THDescBuff bm1 = THTensor_(sizeDesc)(m1);
- THDescBuff bm2 = THTensor_(sizeDesc)(m2);
- THError("size mismatch, m1: %s, m2: %s", bm1.str, bm2.str);
- }
-
- if( t->nDimension != 2 )
- THError("matrix expected, got %dD tensor for t", t->nDimension);
-
- if( (t->size[0] != m1->size[0]) || (t->size[1] != m2->size[1]) ) {
- THDescBuff bt = THTensor_(sizeDesc)(t);
- THDescBuff bm1 = THTensor_(sizeDesc)(m1);
- THDescBuff bm2 = THTensor_(sizeDesc)(m2);
- THError("size mismatch, t: %s, m1: %s, m2: %s", bt.str, bm1.str, bm2.str);
- }
-
- if(t != r_)
- {
- THTensor_(resizeAs)(r_, t);
- THTensor_(copy)(r_, t);
- }
-
- /* r_ */
- if(r_->stride[0] == 1 &&
- r_->stride[1] != 0)
- {
- transpose_r = 'n';
- r__ = r_;
- }
- else if(r_->stride[1] == 1 &&
- r_->stride[0] != 0)
- {
- THTensor *swap = m2;
- m2 = m1;
- m1 = swap;
- transpose_r = 't';
- r__ = r_;
- }
- else
- {
- transpose_r = 'n';
-
- THTensor *transp_r_ = THTensor_(newTranspose)(r_, 0, 1);
- r__ = THTensor_(newClone)(transp_r_);
- THTensor_(free)(transp_r_);
- THTensor_(transpose)(r__, NULL, 0, 1);
- }
-
- /* m1 */
- if(m1->stride[(transpose_r == 'n' ? 0 : 1)] == 1 &&
- m1->stride[(transpose_r == 'n' ? 1 : 0)] != 0)
- {
- transpose_m1 = 'n';
- m1_ = m1;
- }
- else if(m1->stride[(transpose_r == 'n' ? 1 : 0)] == 1 &&
- m1->stride[(transpose_r == 'n' ? 0 : 1)] != 0)
- {
- transpose_m1 = 't';
- m1_ = m1;
- }
- else
- {
- transpose_m1 = (transpose_r == 'n' ? 't' : 'n');
- m1_ = THTensor_(newContiguous)(m1);
- }
-
- /* m2 */
- if(m2->stride[(transpose_r == 'n' ? 0 : 1)] == 1 &&
- m2->stride[(transpose_r == 'n' ? 1 : 0)] != 0)
- {
- transpose_m2 = 'n';
- m2_ = m2;
- }
- else if(m2->stride[(transpose_r == 'n' ? 1 : 0)] == 1 &&
- m2->stride[(transpose_r == 'n' ? 0 : 1)] != 0)
- {
- transpose_m2 = 't';
- m2_ = m2;
- }
- else
- {
- transpose_m2 = (transpose_r == 'n' ? 't' : 'n');
- m2_ = THTensor_(newContiguous)(m2);
- }
-
-#pragma omp critical(blasgemm)
- /* do the operation */
- THBlas_(gemm)(transpose_m1,
- transpose_m2,
- r__->size[(transpose_r == 'n' ? 0 : 1)],
- r__->size[(transpose_r == 'n' ? 1 : 0)],
- m1_->size[(transpose_r == 'n' ? 1 : 0)],
- alpha,
- THTensor_(data)(m1_),
- (transpose_m1 == 'n' ? m1_->stride[(transpose_r == 'n' ? 1 : 0)] : m1_->stride[(transpose_r == 'n' ? 0 : 1)]),
- THTensor_(data)(m2_),
- (transpose_m2 == 'n' ? m2_->stride[(transpose_r == 'n' ? 1 : 0)] : m2_->stride[(transpose_r == 'n' ? 0 : 1)]),
- beta,
- THTensor_(data)(r__),
- r__->stride[(transpose_r == 'n' ? 1 : 0)]);
-
- /* free intermediate variables */
- if(m1_ != m1)
- THTensor_(free)(m1_);
-
- if(m2_ != m2)
- THTensor_(free)(m2_);
-
- if(r__ != r_)
- THTensor_(freeCopyTo)(r__, r_);
-}
-
-void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2)
-{
- if( (vec1->nDimension != 1) || (vec2->nDimension != 1) )
- THError("vector and vector expected, got %dD, %dD tensors",
- vec1->nDimension, vec2->nDimension);
-
- if(t->nDimension != 2)
- THError("expected matrix, got %dD tensor for t", t->nDimension);
-
- if( (t->size[0] != vec1->size[0]) || (t->size[1] != vec2->size[0]) ) {
- THDescBuff bt = THTensor_(sizeDesc)(t);
- THDescBuff bv1 = THTensor_(sizeDesc)(vec1);
- THDescBuff bv2 = THTensor_(sizeDesc)(vec2);
- THError("size mismatch, t: %s, vec1: %s, vec2: %s", bt.str, bv1.str, bv2.str);
- }
-
- if(r_ != t)
- {
- THTensor_(resizeAs)(r_, t);
- THTensor_(copy)(r_, t);
- }
-
- if(beta == 0) {
- THTensor_(zero)(r_);
- }
- else if(beta != 1)
- THTensor_(mul)(r_, r_, beta);
-
- if(r_->stride[0] == 1)
- {
- THBlas_(ger)(vec1->size[0], vec2->size[0],
- alpha, THTensor_(data)(vec1), vec1->stride[0],
- THTensor_(data)(vec2), vec2->stride[0],
- THTensor_(data)(r_), r_->stride[1]);
- }
- else if(r_->stride[1] == 1)
- {
- THBlas_(ger)(vec2->size[0], vec1->size[0],
- alpha, THTensor_(data)(vec2), vec2->stride[0],
- THTensor_(data)(vec1), vec1->stride[0],
- THTensor_(data)(r_), r_->stride[0]);
- }
- else
- {
- THTensor *cr = THTensor_(newClone)(r_);
-
- THBlas_(ger)(vec2->size[0], vec1->size[0],
- alpha, THTensor_(data)(vec2), vec2->stride[0],
- THTensor_(data)(vec1), vec1->stride[0],
- THTensor_(data)(cr), cr->stride[0]);
-
- THTensor_(freeCopyTo)(cr, r_);
- }
-}
-
-void THTensor_(addbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2)
-{
- long batch;
-
- THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, "expected 3D tensor");
- THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, "expected 3D tensor");
- THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2,
- "equal number of batches expected, got %d, %d",
- THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0));
- THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2,
- "wrong matrix size, batch1: %dx%d, batch2: %dx%d",
- THTensor_(size)(batch1, 1), THTensor_(size)(batch1,2),
- THTensor_(size)(batch2, 1), THTensor_(size)(batch2,2));
-
- long dim1 = THTensor_(size)(batch1, 1);
- long dim2 = THTensor_(size)(batch2, 2);
- THArgCheck(THTensor_(size)(t, 0) == dim1, 1, "output tensor of incorrect size");
- THArgCheck(THTensor_(size)(t, 1) == dim2, 1, "output tensor of incorrect size");
-
- if (t != result) {
- THTensor_(resizeAs)(result, t);
- THTensor_(copy)(result, t);
- }
-
- THTensor *matrix1 = THTensor_(new)();
- THTensor *matrix2 = THTensor_(new)();
-
- for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) {
- THTensor_(select)(matrix1, batch1, 0, batch);
- THTensor_(select)(matrix2, batch2, 0, batch);
-
- THTensor_(addmm)(result, beta, result, alpha, matrix1, matrix2);
- beta = 1; // accumulate output once
- }
-
- THTensor_(free)(matrix1);
- THTensor_(free)(matrix2);
-}
-
-void THTensor_(baddbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2)
-{
- long batch;
-
- THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, "expected 3D tensor, got %dD", THTensor_(nDimension)(batch1));
- THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, "expected 3D tensor, got %dD", THTensor_(nDimension)(batch2));
- THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2,
- "equal number of batches expected, got %d, %d",
- THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0));
- THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2,
- "wrong matrix size, batch1: %dx%d, batch2: %dx%d",
- THTensor_(size)(batch1, 1), THTensor_(size)(batch1, 2),
- THTensor_(size)(batch2, 1), THTensor_(size)(batch2, 2));
-
- long bs = THTensor_(size)(batch1, 0);
- long dim1 = THTensor_(size)(batch1, 1);
- long dim2 = THTensor_(size)(batch2, 2);
- THArgCheck(THTensor_(size)(t, 0) == bs, 1, "output tensor of incorrect size");
- THArgCheck(THTensor_(size)(t, 1) == dim1, 1, "output tensor of incorrect size");
- THArgCheck(THTensor_(size)(t, 2) == dim2, 1, "output tensor of incorrect size");
-
- if (t != result) {
- THTensor_(resizeAs)(result, t);
- THTensor_(copy)(result, t);
- }
-
- THTensor *matrix1 = THTensor_(new)();
- THTensor *matrix2 = THTensor_(new)();
- THTensor *result_matrix = THTensor_(new)();
-
- for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) {
- THTensor_(select)(matrix1, batch1, 0, batch);
- THTensor_(select)(matrix2, batch2, 0, batch);
- THTensor_(select)(result_matrix, result, 0, batch);
-
- THTensor_(addmm)(result_matrix, beta, result_matrix, alpha, matrix1, matrix2);
- }
-
- THTensor_(free)(matrix1);
- THTensor_(free)(matrix2);
- THTensor_(free)(result_matrix);
-}
-
-ptrdiff_t THTensor_(numel)(THTensor *t)
-{
- return THTensor_(nElement)(t);
-}
-
-void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)
-{
- THLongStorage *dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
- dimension + TH_INDEX_BASE);
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(values_, dim, NULL);
- THLongTensor_resize(indices_, dim, NULL);
- THLongStorage_free(dim);
-
- // two implementations optimized for data locality
- if (t->stride[dimension] == 1) {
- real theMax;
- real value;
- long theIndex;
- long i;
- TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,
- theMax = t_data[0];
- theIndex = 0;
-
- for(i = 0; i < t_size; i++)
- {
- value = t_data[i*t_stride];
- /* This is not the same as value>theMax in the case of NaNs */
- if(!(value <= theMax))
- {
- theIndex = i;
- theMax = value;
- th_isnan_break(value)
- }
- }
- *indices__data = theIndex;
- *values__data = theMax;);
- } else {
- if (THTensor_(nDimension)(t) > 1) {
- THTensor *t0 = THTensor_(newSelect)(t, dimension, 0);
- THTensor_(copy)(values_, t0);
- THTensor_(free)(t0);
- } else {
- THTensor_(fill)(values_, THTensor_(get1d)(t, 0));
- }
- THLongTensor_zero(indices_);
-
- if(t->size[dimension] == 1) {
- return;
- }
-
- THTensor *tempValues_ = THTensor_(newWithTensor)(values_);
- // tempValues_.expand_as(t)
- tempValues_->size[dimension] = t->size[dimension];
- tempValues_->stride[dimension] = 0;
-
- THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_);
- // tempIndices_.expand_as(t)
- tempIndices_->size[dimension] = t->size[dimension];
- tempIndices_->stride[dimension] = 0;
-
- TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension,
- if(!(*t_data <= *tempValues__data) && !th_isnan(*tempValues__data)) {
- *tempValues__data = *t_data;
- *tempIndices__data = *tempIndices__dimOffset;
- });
-
- THTensor_(free)(tempValues_);
- THLongTensor_free(tempIndices_);
- }
-
- if (!keepdim) {
- THTensor_(squeeze1d)(values_, values_, dimension);
- THLongTensor_squeeze1d(indices_, indices_, dimension);
- }
-}
-
-void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)
-{
- THLongStorage *dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
- dimension + TH_INDEX_BASE);
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(values_, dim, NULL);
- THLongTensor_resize(indices_, dim, NULL);
- THLongStorage_free(dim);
-
- // two implementations optimized for data locality
- if (t->stride[dimension] == 1) {
- real theMax;
- real value;
- long theIndex;
- long i;
- TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,
- theMax = t_data[0];
- theIndex = 0;
-
- for(i = 0; i < t_size; i++)
- {
- value = t_data[i*t_stride];
- /* This is not the same as value>theMax in the case of NaNs */
- if(!(value >= theMax))
- {
- theIndex = i;
- theMax = value;
- th_isnan_break(value)
- }
- }
- *indices__data = theIndex;
- *values__data = theMax;);
- } else {
- if (THTensor_(nDimension)(t) > 1) {
- THTensor *t0 = THTensor_(newSelect)(t, dimension, 0);
- THTensor_(copy)(values_, t0);
- THTensor_(free)(t0);
- } else {
- THTensor_(fill)(values_, THTensor_(get1d)(t, 0));
- }
- THLongTensor_zero(indices_);
-
- if(t->size[dimension] == 1) {
- return;
- }
-
- THTensor *tempValues_ = THTensor_(newWithTensor)(values_);
- // tempValues_.expand_as(t)
- tempValues_->size[dimension] = t->size[dimension];
- tempValues_->stride[dimension] = 0;
-
- THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_);
- // tempIndices_.expand_as(t)
- tempIndices_->size[dimension] = t->size[dimension];
- tempIndices_->stride[dimension] = 0;
-
- TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension,
- if(!(*t_data >= *tempValues__data) && !th_isnan(*tempValues__data)) {
- *tempValues__data = *t_data;
- *tempIndices__data = *tempIndices__dimOffset;
- });
- }
-
- if (!keepdim) {
- THTensor_(squeeze1d)(values_, values_, dimension);
- THLongTensor_squeeze1d(indices_, indices_, dimension);
- }
-}
-
-
-void THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim)
-{
- THLongStorage *dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
- dimension + TH_INDEX_BASE);
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(r_, dim, NULL);
- THLongStorage_free(dim);
-
- // two implementations optimized for data locality
- if (t->stride[dimension] == 1) {
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal sum = 0;
- long i;
- for(i = 0; i < t_size; i++)
- sum += t_data[i*t_stride];
- *r__data = (real)sum;);
- } else {
- THTensor_(zero)(r_);
- THTensor *temp_ = THTensor_(newWithTensor)(r_);
- // r_.expand_as(t)
- temp_->size[dimension] = t->size[dimension];
- temp_->stride[dimension] = 0;
-
- TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data + *t_data;);
- THTensor_(free)(temp_);
- }
-
- if (!keepdim) {
- THTensor_(squeeze1d)(r_, r_, dimension);
- }
-}
-
-void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim)
-{
- THLongStorage *dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
- dimension + TH_INDEX_BASE);
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(r_, dim, NULL);
- THLongStorage_free(dim);
-
- // two implementations optimized for data locality
- if (t->stride[dimension] == 1) {
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal prod = 1;
- long i;
- for(i = 0; i < t_size; i++)
- prod *= t_data[i*t_stride];
- *r__data = (real)prod;);
- } else {
- THTensor_(fill)(r_, 1);
- THTensor *temp_ = THTensor_(newWithTensor)(r_);
- // r_.expand_as(t)
- temp_->size[dimension] = t->size[dimension];
- temp_->stride[dimension] = 0;
-
- TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data * *t_data;);
- THTensor_(free)(temp_);
- }
-
- if (!keepdim) {
- THTensor_(squeeze1d)(r_, r_, dimension);
- }
-}
-
-void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension)
-{
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
- dimension + TH_INDEX_BASE);
-
- THTensor_(resizeAs)(r_, t);
-
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal cumsum = 0;
- long i;
- for(i = 0; i < t_size; i++)
- {
- cumsum += t_data[i*t_stride];
- r__data[i*r__stride] = (real)cumsum;
- });
-}
-
-void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension)
-{
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
- dimension + TH_INDEX_BASE);
-
- THTensor_(resizeAs)(r_, t);
-
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal cumprod = 1;
- long i;
- for(i = 0; i < t_size; i++)
- {
- cumprod *= t_data[i*t_stride];
- r__data[i*r__stride] = (real)cumprod;
- });
-}
-
-
-void THTensor_(sign)(THTensor *r_, THTensor *t)
-{
- THTensor_(resizeAs)(r_, t);
-
-#if defined (TH_REAL_IS_BYTE)
- TH_TENSOR_APPLY2(real, r_, real, t,
- if (*t_data > 0) *r__data = 1;
- else *r__data = 0;);
-#else
- TH_TENSOR_APPLY2(real, r_, real, t,
- if (*t_data > 0) *r__data = 1;
- else if (*t_data < 0) *r__data = -1;
- else *r__data = 0;);
-#endif
-}
-
-
-accreal THTensor_(trace)(THTensor *t)
-{
- real *t_data = THTensor_(data)(t);
- accreal sum = 0;
- long i = 0;
- long t_stride_0, t_stride_1, t_diag_size;
-
- THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix");
-
- t_stride_0 = THTensor_(stride)(t, 0);
- t_stride_1 = THTensor_(stride)(t, 1);
- t_diag_size = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1));
- while(i < t_diag_size)
- {
- sum += t_data[i*(t_stride_0+t_stride_1)];
- i++;
- }
-
- return sum;
-}
-
-void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension)
-{
- int i;
-
- if(THTensor_(nDimension)(a) != THTensor_(nDimension)(b))
- THError("inconsistent tensor dimension %dD, %dD",
- THTensor_(nDimension)(a), THTensor_(nDimension)(b));
-
- for(i = 0; i < THTensor_(nDimension)(a); i++)
- {
- if(THTensor_(size)(a, i) != THTensor_(size)(b, i)) {
- THDescBuff ba = THTensor_(sizeDesc)(a);
- THDescBuff bb = THTensor_(sizeDesc)(b);
- THError("inconsistent tensor sizes %s, %s", ba.str, bb.str);
- }
- }
-
- if(dimension < 0)
- {
- for(i = 0; i < THTensor_(nDimension)(a); i++)
- {
- if(THTensor_(size)(a, i) == 3)
- {
- dimension = i;
- break;
- }
- }
- if(dimension < 0) {
- THDescBuff ba = THTensor_(sizeDesc)(a);
- THError("no dimension of size 3 in a: %s", ba.str);
- }
- }
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(a), 3, "dimension %d out of range",
- dimension + TH_INDEX_BASE);
- THArgCheck(THTensor_(size)(a, dimension) == 3, 3, "dimension %d does not have size 3",
- dimension + TH_INDEX_BASE);
-
- THTensor_(resizeAs)(r_, a);
-
- TH_TENSOR_DIM_APPLY3(real, a, real, b, real, r_, dimension,
- r__data[0*r__stride] = a_data[1*a_stride]*b_data[2*b_stride] - a_data[2*a_stride]*b_data[1*b_stride];
- r__data[1*r__stride] = a_data[2*a_stride]*b_data[0*b_stride] - a_data[0*a_stride]*b_data[2*b_stride];
- r__data[2*r__stride] = a_data[0*a_stride]*b_data[1*b_stride] - a_data[1*a_stride]*b_data[0*b_stride];);
-}
-
-void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src) {
- THTensor_(resizeAs)(r, t);
- TH_TENSOR_APPLY3(real, r, real, t, real, src,
- *r_data = *t_data > *src_data ? *t_data : *src_data;);
-}
-
-void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src) {
- THTensor_(resizeAs)(r, t);
- TH_TENSOR_APPLY3(real, r, real, t, real, src,
- *r_data = *t_data < *src_data ? *t_data : *src_data;);
-}
-
-void THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value) {
- THTensor_(resizeAs)(r, t);
- TH_TENSOR_APPLY2(real, r, real, t,
- *r_data = *t_data > value ? *t_data : value;);
-}
-
-void THTensor_(cminValue)(THTensor *r, THTensor *t, real value) {
- THTensor_(resizeAs)(r, t);
- TH_TENSOR_APPLY2(real, r, real, t,
- *r_data = *t_data < value ? *t_data : value;);
-}
-
-void THTensor_(zeros)(THTensor *r_, THLongStorage *size)
-{
- THTensor_(resize)(r_, size, NULL);
- THTensor_(zero)(r_);
-}
-
-void THTensor_(ones)(THTensor *r_, THLongStorage *size)
-{
- THTensor_(resize)(r_, size, NULL);
- THTensor_(fill)(r_, 1);
-}
-
-void THTensor_(diag)(THTensor *r_, THTensor *t, int k)
-{
- THArgCheck(THTensor_(nDimension)(t) == 1 || THTensor_(nDimension)(t) == 2, 1, "matrix or a vector expected");
-
- if(THTensor_(nDimension)(t) == 1)
- {
- real *t_data = THTensor_(data)(t);
- long t_stride_0 = THTensor_(stride)(t, 0);
- long t_size = THTensor_(size)(t, 0);
- long sz = t_size + (k >= 0 ? k : -k);
- real *r__data;
- long r__stride_0;
- long r__stride_1;
- long i;
-
- THTensor_(resize2d)(r_, sz, sz);
- THTensor_(zero)(r_);
- r__data = THTensor_(data)(r_);
- r__stride_0 = THTensor_(stride)(r_, 0);
- r__stride_1 = THTensor_(stride)(r_, 1);
- r__data += (k >= 0 ? k*r__stride_1 : -k*r__stride_0);
-
- for(i = 0; i < t_size; i++)
- r__data[i*(r__stride_0+r__stride_1)] = t_data[i*t_stride_0];
- }
- else
- {
- real *t_data = THTensor_(data)(t);
- long t_stride_0 = THTensor_(stride)(t, 0);
- long t_stride_1 = THTensor_(stride)(t, 1);
- long sz;
- real *r__data;
- long r__stride_0;
- long i;
-
- if(k >= 0)
- sz = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1)-k);
- else
- sz = THMin(THTensor_(size)(t, 0)+k, THTensor_(size)(t, 1));
- THTensor_(resize1d)(r_, sz);
- r__data = THTensor_(data)(r_);
- r__stride_0 = THTensor_(stride)(r_, 0);
-
- t_data += (k >= 0 ? k*t_stride_1 : -k*t_stride_0);
- for(i = 0; i < sz; i++)
- r__data[i*r__stride_0] = t_data[i*(t_stride_0+t_stride_1)];
- }
-}
-
-void THTensor_(eye)(THTensor *r_, long n, long m)
-{
- real *r__data;
- long i, sz;
-
- THArgCheck(n > 0, 1, "invalid argument");
-
- if(m <= 0)
- m = n;
-
- THTensor_(resize2d)(r_, n, m);
- THTensor_(zero)(r_);
-
- i = 0;
- r__data = THTensor_(data)(r_);
- sz = THMin(THTensor_(size)(r_, 0), THTensor_(size)(r_, 1));
- for(i = 0; i < sz; i++)
- r__data[i*(r_->stride[0]+r_->stride[1])] = 1;
-}
-
-
-void THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step)
-{
- ptrdiff_t size;
- real i = 0;
-
- THArgCheck(step > 0 || step < 0, 3, "step must be a non-null number");
- THArgCheck(((step > 0) && (xmax >= xmin)) || ((step < 0) && (xmax <= xmin))
- , 2, "upper bound and larger bound incoherent with step sign");
-
- size = (ptrdiff_t) (((xmax - xmin) / step) + 1);
-
- if (THTensor_(nElement)(r_) != size) {
- THTensor_(resize1d)(r_, size);
- }
-
- TH_TENSOR_APPLY(real, r_, *r__data = xmin + (i++)*step;);
-}
-
-void THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step) {
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
- int m = fmod(xmax - xmin,step) == 0;
-#else
- int m = (xmax - xmin) % step == 0;
-#endif
- if (m)
- xmax -= step;
- THTensor_(range)(r_,xmin,xmax,step);
-}
-
-void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n)
-{
- real *r__data;
- long r__stride_0;
- long i;
-
- THArgCheck(n > 0, 1, "must be strictly positive");
-
- THTensor_(resize1d)(r_, n);
- r__data = THTensor_(data)(r_);
- r__stride_0 = THTensor_(stride)(r_,0);
-
- for(i = 0; i < n; i++)
- r__data[i*r__stride_0] = (real)(i);
-
- for(i = 0; i < n-1; i++)
- {
- long z = THRandom_random(_generator) % (n-i);
- real sav = r__data[i*r__stride_0];
- r__data[i*r__stride_0] = r__data[(z+i)*r__stride_0];
- r__data[(z+i)*r__stride_0] = sav;
- }
-}
-
-void THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size)
-{
- THTensor_(resize)(r_, size, NULL);
- THTensor_(copy)(r_, t);
-}
-
-/* I cut and pasted (slightly adapted) the quicksort code from
- Sedgewick's 1978 "Implementing Quicksort Programs" article
- http://www.csie.ntu.edu.tw/~b93076/p847-sedgewick.pdf
-
- It is the state of the art existing implementation. The macros
- are here to make as close a match as possible to the pseudocode of
- Program 2 p.851
-
- Note that other partition schemes exist, and are typically presented
- in textbook, but those are less efficient. See e.g.
- http://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto
-
- Julien, November 12th 2013
-*/
-#define MAX_LEVELS 300
-#define M_SMALL 10 /* Limit for small subfiles */
-
-#define ARR(III) arr[(III)*stride]
-#define IDX(III) idx[(III)*stride]
-
-#define LONG_SWAP(AAA, BBB) swap = AAA; AAA = BBB; BBB = swap
-#define REAL_SWAP(AAA, BBB) rswap = AAA; AAA = BBB; BBB = rswap
-
-#define ARR_SWAP(III, JJJ) \
- REAL_SWAP(ARR(III), ARR(JJJ));
-
-#define BOTH_SWAP(III, JJJ) \
- REAL_SWAP(ARR(III), ARR(JJJ)); \
- LONG_SWAP(IDX(III), IDX(JJJ))
-
-static void THTensor_(quicksortascend)(real *arr, long *idx, long elements, long stride)
-{
- long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left;
- real rswap, piv;
- unsigned char done = 0;
-
- /* beg[0]=0; end[0]=elements; */
- stack = 0;
- L = 0; R = elements-1;
- done = elements-1 <= M_SMALL;
-
- while(!done) {
- /* Use median of three for pivot choice */
- P=(L+R)>>1;
- BOTH_SWAP(P, L+1);
- if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); }
- if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); }
- if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); }
-
- i = L+1; j = R; piv = ARR(L); pid = IDX(L);
-
- do {
- do { i = i+1; } while(ARR(i) < piv);
- do { j = j-1; } while(ARR(j) > piv);
- if (j < i)
- break;
- BOTH_SWAP(i, j);
- } while(1);
- BOTH_SWAP(L, j);
- /* Left subfile is (L, j-1) */
- /* Right subfile is (i, R) */
- sz_left = j-L;
- sz_right = R-i+1;
- if (sz_left <= M_SMALL && sz_right <= M_SMALL) {
- /* both subfiles are small */
- /* if stack empty */
- if (stack == 0) {
- done = 1;
- } else {
- stack--;
- L = beg[stack];
- R = end[stack];
- }
- } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) {
- /* exactly one of the subfiles is small */
- /* (L,R) = large subfile */
- if (sz_left > sz_right) {
- /* Implicit: L = L; */
- R = j-1;
- } else {
- L = i;
- /* Implicit: R = R; */
- }
- } else {
- /* none of the subfiles is small */
- /* push large subfile */
- /* (L,R) = small subfile */
- if (sz_left > sz_right) {
- beg[stack] = L;
- end[stack] = j-1;
- stack++;
- L = i;
- /* Implicit: R = R */
- } else {
- beg[stack] = i;
- end[stack] = R;
- stack++;
- /* Implicit: L = L; */
- R = j-1;
- }
- }
- } /* while not done */
- /* Now insertion sort on the concatenation of subfiles */
- for(i=elements-2; i>=0; i--) {
- if (ARR(i) > ARR(i+1)) {
- piv = ARR(i);
- pid = IDX(i);
- j = i+1;
- do {
- ARR(j-1) = ARR(j);
- IDX(j-1) = IDX(j);
- j = j+1;
- } while(j < elements && ARR(j) < piv);
- ARR(j-1) = piv;
- IDX(j-1) = pid;
- }
- }
-}
-
-static void THTensor_(quicksortdescend)(real *arr, long *idx, long elements, long stride)
-{
- long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left;
- real rswap, piv;
- unsigned char done = 0;
-
- /* beg[0]=0; end[0]=elements; */
- stack = 0;
- L = 0; R = elements-1;
- done = elements-1 <= M_SMALL;
-
- while(!done) {
- /* Use median of three for pivot choice */
- P=(L+R)>>1;
- BOTH_SWAP(P, L+1);
- if (ARR(L+1) < ARR(R)) { BOTH_SWAP(L+1, R); }
- if (ARR(L) < ARR(R)) { BOTH_SWAP(L, R); }
- if (ARR(L+1) < ARR(L)) { BOTH_SWAP(L+1, L); }
-
- i = L+1; j = R; piv = ARR(L); pid = IDX(L);
-
- do {
- do { i = i+1; } while(ARR(i) > piv);
- do { j = j-1; } while(ARR(j) < piv);
- if (j < i)
- break;
- BOTH_SWAP(i, j);
- } while(1);
- BOTH_SWAP(L, j);
- /* Left subfile is (L, j-1) */
- /* Right subfile is (i, R) */
- sz_left = j-L;
- sz_right = R-i+1;
- if (sz_left <= M_SMALL && sz_right <= M_SMALL) {
- /* both subfiles are small */
- /* if stack empty */
- if (stack == 0) {
- done = 1;
- } else {
- stack--;
- L = beg[stack];
- R = end[stack];
- }
- } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) {
- /* exactly one of the subfiles is small */
- /* (L,R) = large subfile */
- if (sz_left > sz_right) {
- /* Implicit: L = L; */
- R = j-1;
- } else {
- L = i;
- /* Implicit: R = R; */
- }
- } else {
- /* none of the subfiles is small */
- /* push large subfile */
- /* (L,R) = small subfile */
- if (sz_left > sz_right) {
- beg[stack] = L;
- end[stack] = j-1;
- stack++;
- L = i;
- /* Implicit: R = R */
- } else {
- beg[stack] = i;
- end[stack] = R;
- stack++;
- /* Implicit: L = L; */
- R = j-1;
- }
- }
- } /* while not done */
- /* Now insertion sort on the concatenation of subfiles */
- for(i=elements-2; i>=0; i--) {
- if (ARR(i) < ARR(i+1)) {
- piv = ARR(i);
- pid = IDX(i);
- j = i+1;
- do {
- ARR(j-1) = ARR(j);
- IDX(j-1) = IDX(j);
- j = j+1;
- } while(j < elements && ARR(j) > piv);
- ARR(j-1) = piv;
- IDX(j-1) = pid;
- }
- }
-}
-
-#undef MAX_LEVELS
-#undef M_SMALL
-
-void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder)
-{
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "invalid dimension %d",
- dimension + TH_INDEX_BASE);
-
- THTensor_(resizeAs)(rt_, t);
- THTensor_(copy)(rt_, t);
-
- {
- THLongStorage *size = THTensor_(newSizeOf)(t);
- THLongTensor_resize(ri_, size, NULL);
- THLongStorage_free(size);
- }
-
- if(descendingOrder)
- {
- TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension,
- long i;
- for(i = 0; i < ri__size; i++)
- ri__data[i*ri__stride] = i;
- THTensor_(quicksortdescend)(rt__data, ri__data, rt__size, rt__stride);)
- }
- else
- {
- TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension,
- long i;
- for(i = 0; i < ri__size; i++)
- ri__data[i*ri__stride] = i;
- THTensor_(quicksortascend)(rt__data, ri__data, rt__size, rt__stride);)
- }
-}
-
-/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's
-public domain implementation at http://ndevilla.free.fr/median/median/
-Adapted similarly to the above Quicksort algorithm.
-This version does not produce indices along with values. */
-static void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride)
-{
- long P, L, R, i, j, swap;
- real rswap, piv;
- L = 0;
- R = elements-1;
-
- do {
- if (R <= L) /* One element only */
- return;
-
- if (R == L+1) { /* Two elements only */
- if (ARR(L) > ARR(R)) {
- ARR_SWAP(L, R);
- }
- return;
- }
-
- /* Use median of three for pivot choice */
- P=(L+R)>>1;
- ARR_SWAP(P, L+1);
- if (ARR(L+1) > ARR(R)) { ARR_SWAP(L+1, R); }
- if (ARR(L) > ARR(R)) { ARR_SWAP(L, R); }
- if (ARR(L+1) > ARR(L)) { ARR_SWAP(L+1, L); }
-
- i = L+1;
- j = R;
- piv = ARR(L);
- do {
- do i++; while(ARR(i) < piv);
- do j--; while(ARR(j) > piv);
- if (j < i)
- break;
- ARR_SWAP(i, j);
- } while(1);
- ARR_SWAP(L, j);
-
- /* Re-set active partition */
- if (j <= k) L=i;
- if (j >= k) R=j-1;
- } while(1);
-}
-
-/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's
-public domain implementation at http://ndevilla.free.fr/median/median/
-Adapted similarly to the above Quicksort algorithm. */
-static void THTensor_(quickselect)(real *arr, long *idx, long k, long elements, long stride)
-{
- long P, L, R, i, j, swap, pid;
- real rswap, piv;
- L = 0;
- R = elements-1;
-
- do {
- if (R <= L) /* One element only */
- return;
-
- if (R == L+1) { /* Two elements only */
- if (ARR(L) > ARR(R)) {
- BOTH_SWAP(L, R);
- }
- return;
- }
-
- /* Use median of three for pivot choice */
- P=(L+R)>>1;
- BOTH_SWAP(P, L+1);
- if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); }
- if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); }
- if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); }
-
- i = L+1;
- j = R;
- piv = ARR(L);
- pid = IDX(L);
- do {
- do i++; while(ARR(i) < piv);
- do j--; while(ARR(j) > piv);
- if (j < i)
- break;
- BOTH_SWAP(i, j);
- } while(1);
- BOTH_SWAP(L, j);
-
- /* Re-set active partition */
- if (j <= k) L=i;
- if (j >= k) R=j-1;
- } while(1);
-}
-
-#undef ARR
-#undef IDX
-#undef LONG_SWAP
-#undef REAL_SWAP
-#undef BOTH_SWAP
-
-void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)
-{
- THLongStorage *dim;
- THTensor *temp_;
- THLongTensor *tempi_;
- real *temp__data;
- long *tempi__data;
- long t_size_dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range");
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(values_, dim, NULL);
- THLongTensor_resize(indices_, dim, NULL);
- THLongStorage_free(dim);
-
- t_size_dim = THTensor_(size)(t, dimension);
-
- temp_ = THTensor_(new)();
- THTensor_(resize1d)(temp_, t_size_dim);
- temp__data = THTensor_(data)(temp_);
-
- tempi_ = THLongTensor_new();
- THLongTensor_resize1d(tempi_, t_size_dim);
- tempi__data = THLongTensor_data(tempi_);
-
- TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,
- long i;
- real mode = 0;
- long modei = 0;
- long temp_freq = 0;
- long max_freq = 0;
- for(i = 0; i < t_size_dim; i++)
- temp__data[i] = t_data[i*t_stride];
- for(i = 0; i < t_size_dim; i++)
- tempi__data[i] = i;
- THTensor_(quicksortascend)(temp__data, tempi__data, t_size_dim, 1);
-
- for(i = 0; i < t_size_dim; i++)
- {
- temp_freq++;
- if ((i == t_size_dim - 1) || (temp__data[i] != temp__data[i+1]))
- {
- if (temp_freq > max_freq)
- {
- mode = temp__data[i];
- modei = tempi__data[i];
- max_freq = temp_freq;
- }
- temp_freq = 0;
- }
- }
- *values__data = mode;
- *indices__data = modei;);
-
- THTensor_(free)(temp_);
- THLongTensor_free(tempi_);
- if (!keepdim) {
- THTensor_(squeeze1d)(values_, values_, dimension);
- THLongTensor_squeeze1d(indices_, indices_, dimension);
- }
-}
-
-void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim)
-{
- THLongStorage *dim;
- THTensor *temp_;
- THLongTensor *tempi_;
- real *temp__data;
- long *tempi__data;
- long t_size_dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range");
- THArgCheck(k > 0 && k <= t->size[dimension], 2, "selected index out of range");
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(values_, dim, NULL);
- THLongTensor_resize(indices_, dim, NULL);
- THLongStorage_free(dim);
-
- t_size_dim = THTensor_(size)(t, dimension);
-
- temp_ = THTensor_(new)();
- THTensor_(resize1d)(temp_, t_size_dim);
- temp__data = THTensor_(data)(temp_);
-
- tempi_ = THLongTensor_new();
- THLongTensor_resize1d(tempi_, t_size_dim);
- tempi__data = THLongTensor_data(tempi_);
-
- TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,
- long i;
- for(i = 0; i < t_size_dim; i++)
- temp__data[i] = t_data[i*t_stride];
- for(i = 0; i < t_size_dim; i++)
- tempi__data[i] = i;
- THTensor_(quickselect)(temp__data, tempi__data, k - 1, t_size_dim, 1);
- *values__data = temp__data[k-1];
- *indices__data = tempi__data[k-1];);
-
- THTensor_(free)(temp_);
- THLongTensor_free(tempi_);
- if (!keepdim) {
- THTensor_(squeeze1d)(values_, values_, dimension);
- THLongTensor_squeeze1d(indices_, indices_, dimension);
- }
-}
-
-void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)
-{
- long t_size_dim, k;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range");
-
- t_size_dim = THTensor_(size)(t, dimension);
- k = (t_size_dim-1) >> 1; /* take middle or one-before-middle element */
-
- THTensor_(kthvalue)(values_, indices_, t, k+1, dimension, keepdim);
-}
-
-void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted)
-{
- int numDims = THTensor_(nDimension)(t);
- THArgCheck(dim >= 0 && dim < numDims, 3, "dim not in range");
-
- long sliceSize = THTensor_(size)(t, dim);
- THArgCheck(k > 0 && k <= sliceSize, 2, "k not in range for dimension");
-
- THTensor *tmpResults = THTensor_(new)();
- THTensor_(resize1d)(tmpResults, sliceSize);
- real *tmp__data = THTensor_(data)(tmpResults);
-
- THLongTensor *tmpIndices = THLongTensor_new();
- THLongTensor_resize1d(tmpIndices, sliceSize);
- long *tmpi__data = THLongTensor_data(tmpIndices);
-
- THLongStorage *topKSize = THTensor_(newSizeOf)(t);
- THLongStorage_set(topKSize, dim, k);
- THTensor_(resize)(rt_, topKSize, NULL);
- THLongTensor_resize(ri_, topKSize, NULL);
- THLongStorage_free(topKSize);
-
- if (dir) {
- /* k largest elements, descending order (optional: see sorted) */
- long K = sliceSize - k;
- TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim,
- long i;
- for(i = 0; i < sliceSize; i++)
- {
- tmp__data[i] = t_data[i*t_stride];
- tmpi__data[i] = i;
- }
- if (K > 0)
- THTensor_(quickselect)(tmp__data, tmpi__data, K - 1, sliceSize, 1);
- if (sorted)
- THTensor_(quicksortdescend)(tmp__data + K, tmpi__data + K, k, 1);
- for(i = 0; i < k; i++)
- {
- rt__data[i*rt__stride] = tmp__data[i + K];
- ri__data[i*ri__stride] = tmpi__data[i + K];
- })
- }
- else {
- /* k smallest elements, ascending order (optional: see sorted) */
- TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim,
- long i;
- for(i = 0; i < sliceSize; i++)
- {
- tmp__data[i] = t_data[i*t_stride];
- tmpi__data[i] = i;
- }
- THTensor_(quickselect)(tmp__data, tmpi__data, k - 1, sliceSize, 1);
- if (sorted)
- THTensor_(quicksortascend)(tmp__data, tmpi__data, k - 1, 1);
- for(i = 0; i < k; i++)
- {
- rt__data[i*rt__stride] = tmp__data[i];
- ri__data[i*ri__stride] = tmpi__data[i];
- })
- }
-
- THTensor_(free)(tmpResults);
- THLongTensor_free(tmpIndices);
-}
-
-void THTensor_(tril)(THTensor *r_, THTensor *t, long k)
-{
- long t_size_0, t_size_1;
- long t_stride_0, t_stride_1;
- long r__stride_0, r__stride_1;
- real *t_data, *r__data;
- long r, c;
-
- THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix");
-
- THTensor_(resizeAs)(r_, t);
-
- t_size_0 = THTensor_(size)(t, 0);
- t_size_1 = THTensor_(size)(t, 1);
- t_stride_0 = THTensor_(stride)(t, 0);
- t_stride_1 = THTensor_(stride)(t, 1);
- r__stride_0 = THTensor_(stride)(r_, 0);
- r__stride_1 = THTensor_(stride)(r_, 1);
- r__data = THTensor_(data)(r_);
- t_data = THTensor_(data)(t);
-
- for(r = 0; r < t_size_0; r++)
- {
- long sz = THMin(r+k+1, t_size_1);
- for(c = THMax(0, r+k+1); c < t_size_1; c++)
- r__data[r*r__stride_0+c*r__stride_1] = 0;
- for(c = 0; c < sz; c++)
- r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1];
- }
-}
-
-void THTensor_(triu)(THTensor *r_, THTensor *t, long k)
-{
- long t_size_0, t_size_1;
- long t_stride_0, t_stride_1;
- long r__stride_0, r__stride_1;
- real *t_data, *r__data;
- long r, c;
-
- THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix");
-
- THTensor_(resizeAs)(r_, t);
-
- t_size_0 = THTensor_(size)(t, 0);
- t_size_1 = THTensor_(size)(t, 1);
- t_stride_0 = THTensor_(stride)(t, 0);
- t_stride_1 = THTensor_(stride)(t, 1);
- r__stride_0 = THTensor_(stride)(r_, 0);
- r__stride_1 = THTensor_(stride)(r_, 1);
- r__data = THTensor_(data)(r_);
- t_data = THTensor_(data)(t);
-
- for(r = 0; r < t_size_0; r++)
- {
- long sz = THMin(r+k, t_size_1);
- for(c = THMax(0, r+k); c < t_size_1; c++)
- r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1];
- for(c = 0; c < sz; c++)
- r__data[r*r__stride_0+c*r__stride_1] = 0;
- }
-}
-
-void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension)
-{
- THTensor* inputs[2];
- inputs[0] = ta;
- inputs[1] = tb;
- THTensor_(catArray)(r_, inputs, 2, dimension);
-}
-
-void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension)
-{
- THLongStorage *size;
- int i, j;
- long offset;
- int maxDim = dimension + 1;
- int allEmpty = 1;
- int allContiguous = 1;
-
- // cat_dimension is the actual dimension we cat along
- int cat_dimension = dimension;
-
- for (i = 0; i < numInputs; i++)
- {
- maxDim = THMax(maxDim, inputs[i]->nDimension);
- }
-
- // When the user input dimension is -1 (i.e. -2 in C)
- // Then we pick the maximum last dimension across all tensors.
- if ( dimension + TH_INDEX_BASE == -1 )
- {
- cat_dimension = maxDim?(maxDim-1):0;
- }
-
- THArgCheck(numInputs > 0, 3, "invalid number of inputs %d", numInputs);
- THArgCheck(cat_dimension >= 0, 4, "invalid dimension %d", dimension + TH_INDEX_BASE);
-
- size = THLongStorage_newWithSize(maxDim);
-
- for(i = 0; i < maxDim; i++)
- {
- // dimSize is either the size of the dim if it exists, either 1 if #dim > 0, otherwise 0
- long dimSize = i < inputs[0]->nDimension ? inputs[0]->size[i] : THMin(inputs[0]->nDimension, 1);
- if (i == cat_dimension)
- {
- for (j = 1; j < numInputs; j++)
- {
- // accumulate the size over the dimension we want to cat on.
- // Empty tensors are allowed
- dimSize += i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1);
- }
- }
- else
- {
- for (j = 1; j < numInputs; j++)
- {
- long sz = (i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1));
- // If it's a dimension we're not catting on
- // Then fail if sizes are different AND > 0
- if (dimSize != sz && dimSize && sz)
- {
- THLongStorage_free(size);
- THError("inconsistent tensor sizes");
- }
- else if(!dimSize)
- {
- dimSize = sz;
- }
- }
- }
- allEmpty = allEmpty && !dimSize;
- size->data[i] = dimSize;
- }
-
- // Initiate catting and resizing
- // If at least one of the input is not empty
- if (!allEmpty)
- {
- THTensor_(resize)(result, size, NULL);
-
- // Check contiguity of all inputs and result
- for (i = 0; i < numInputs; i++) {
- if(inputs[i]->nDimension) {
- allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]);
- }
- }
- allContiguous = allContiguous && THTensor_(isContiguous)(result);
-
- // First path is for contiguous inputs along dim 1
- // Second path for non-contiguous
- if (cat_dimension == 0 && allContiguous)
- {
- real* result_data = result->storage->data + result->storageOffset;
- offset = 0;
- for (j = 0; j < numInputs; j++)
- {
- if (inputs[j]->nDimension)
- {
- THTensor* input0 = inputs[j];
- real* input0_data = input0->storage->data + input0->storageOffset;
- long input0_size = THTensor_(nElement)(input0);
- memcpy(result_data + offset, input0_data, input0_size*sizeof(real));
- offset += input0_size;
- }
- }
- }
- else
- {
- offset = 0;
- for (j = 0; j < numInputs; j++)
- {
- if (inputs[j]->nDimension)
- {
- long dimSize = cat_dimension < inputs[j]->nDimension ? inputs[j]->size[cat_dimension] : 1;
- THTensor *nt = THTensor_(newWithTensor)(result);
- THTensor_(narrow)(nt, NULL, cat_dimension, offset, dimSize);
- THTensor_(copy)(nt, inputs[j]);
- THTensor_(free)(nt);
- offset += dimSize;
- }
- }
- }
- }
- THLongStorage_free(size);
-}
-
-int THTensor_(equal)(THTensor *ta, THTensor* tb)
-{
- int equal = 1;
- if(!THTensor_(isSameSizeAs)(ta, tb))
- return 0;
-
- if (THTensor_(isContiguous)(ta) && THTensor_(isContiguous)(tb)) {
- real *tap = THTensor_(data)(ta);
- real *tbp = THTensor_(data)(tb);
- ptrdiff_t sz = THTensor_(nElement)(ta);
- ptrdiff_t i;
- for (i=0; i<sz; ++i){
- if(tap[i] != tbp[i]) return 0;
- }
- } else {
- // Short-circuit the apply function on inequality
- TH_TENSOR_APPLY2(real, ta, real, tb,
- if (equal && *ta_data != *tb_data) {
- equal = 0;
- TH_TENSOR_APPLY_hasFinished = 1; break;
- })
- }
- return equal;
-}
-
-#define TENSOR_IMPLEMENT_LOGICAL(NAME,OP) \
- void THTensor_(NAME##Value)(THByteTensor *r_, THTensor* t, real value) \
- { \
- THByteTensor_resizeNd(r_, t->nDimension, t->size, NULL); \
- TH_TENSOR_APPLY2(unsigned char, r_, real, t, \
- *r__data = (*t_data OP value) ? 1 : 0;); \
- } \
- void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, real value) \
- { \
- THTensor_(resizeNd)(r_, t->nDimension, t->size, NULL); \
- TH_TENSOR_APPLY2(real, r_, real, t, \
- *r__data = (*t_data OP value) ? 1 : 0;); \
- } \
- void THTensor_(NAME##Tensor)(THByteTensor *r_, THTensor *ta, THTensor *tb) \
- { \
- THByteTensor_resizeNd(r_, ta->nDimension, ta->size, NULL); \
- TH_TENSOR_APPLY3(unsigned char, r_, real, ta, real, tb, \
- *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \
- } \
- void THTensor_(NAME##TensorT)(THTensor *r_, THTensor *ta, THTensor *tb) \
- { \
- THTensor_(resizeNd)(r_, ta->nDimension, ta->size, NULL); \
- TH_TENSOR_APPLY3(real, r_, real, ta, real, tb, \
- *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \
- } \
-
-
-TENSOR_IMPLEMENT_LOGICAL(lt,<)
-TENSOR_IMPLEMENT_LOGICAL(gt,>)
-TENSOR_IMPLEMENT_LOGICAL(le,<=)
-TENSOR_IMPLEMENT_LOGICAL(ge,>=)
-TENSOR_IMPLEMENT_LOGICAL(eq,==)
-TENSOR_IMPLEMENT_LOGICAL(ne,!=)
-
-#define LAB_IMPLEMENT_BASIC_FUNCTION(NAME, CFUNC) \
- void THTensor_(NAME)(THTensor *r_, THTensor *t) \
- { \
- THTensor_(resizeAs)(r_, t); \
- TH_TENSOR_APPLY2(real, t, real, r_, *r__data = CFUNC(*t_data);); \
- } \
-
-#define LAB_IMPLEMENT_BASIC_FUNCTION_VALUE(NAME, CFUNC) \
- void THTensor_(NAME)(THTensor *r_, THTensor *t, real value) \
- { \
- THTensor_(resizeAs)(r_, t); \
- TH_TENSOR_APPLY2(real, t, real, r_, *r__data = CFUNC(*t_data, value);); \
- } \
-
-#if defined(TH_REAL_IS_LONG)
-LAB_IMPLEMENT_BASIC_FUNCTION(abs,labs)
-#endif /* long only part */
-
-#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT)
-LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs)
-#endif /* int only part */
-
-#if defined(TH_REAL_IS_BYTE)
-
-#define TENSOR_IMPLEMENT_LOGICAL_SUM(NAME, OP, INIT_VALUE) \
- int THTensor_(NAME)(THTensor *tensor) \
- { \
- THArgCheck(tensor->nDimension > 0, 1, "empty Tensor"); \
- int sum = INIT_VALUE; \
- TH_TENSOR_APPLY(real, tensor, sum = sum OP *tensor_data;); \
- return sum; \
- }
-
-TENSOR_IMPLEMENT_LOGICAL_SUM(logicalall, &&, 1)
-TENSOR_IMPLEMENT_LOGICAL_SUM(logicalany, ||, 0)
-
-#endif /* Byte only part */
-
-/* floating point only now */
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
-
-#if defined (TH_REAL_IS_FLOAT)
-#define TH_MATH_NAME(fn) fn##f
-#else
-#define TH_MATH_NAME(fn) fn
-#endif
-
-LAB_IMPLEMENT_BASIC_FUNCTION(log,TH_MATH_NAME(log))
-LAB_IMPLEMENT_BASIC_FUNCTION(lgamma,TH_MATH_NAME(lgamma))
-LAB_IMPLEMENT_BASIC_FUNCTION(log1p,TH_MATH_NAME(log1p))
-LAB_IMPLEMENT_BASIC_FUNCTION(sigmoid,TH_MATH_NAME(TH_sigmoid))
-LAB_IMPLEMENT_BASIC_FUNCTION(exp,TH_MATH_NAME(exp))
-LAB_IMPLEMENT_BASIC_FUNCTION(cos,TH_MATH_NAME(cos))
-LAB_IMPLEMENT_BASIC_FUNCTION(acos,TH_MATH_NAME(acos))
-LAB_IMPLEMENT_BASIC_FUNCTION(cosh,TH_MATH_NAME(cosh))
-LAB_IMPLEMENT_BASIC_FUNCTION(sin,TH_MATH_NAME(sin))
-LAB_IMPLEMENT_BASIC_FUNCTION(asin,TH_MATH_NAME(asin))
-LAB_IMPLEMENT_BASIC_FUNCTION(sinh,TH_MATH_NAME(sinh))
-LAB_IMPLEMENT_BASIC_FUNCTION(tan,TH_MATH_NAME(tan))
-LAB_IMPLEMENT_BASIC_FUNCTION(atan,TH_MATH_NAME(atan))
-LAB_IMPLEMENT_BASIC_FUNCTION(tanh,TH_MATH_NAME(tanh))
-LAB_IMPLEMENT_BASIC_FUNCTION_VALUE(pow,TH_MATH_NAME(pow))
-LAB_IMPLEMENT_BASIC_FUNCTION(sqrt,TH_MATH_NAME(sqrt))
-LAB_IMPLEMENT_BASIC_FUNCTION(rsqrt,TH_MATH_NAME(TH_rsqrt))
-LAB_IMPLEMENT_BASIC_FUNCTION(ceil,TH_MATH_NAME(ceil))
-LAB_IMPLEMENT_BASIC_FUNCTION(floor,TH_MATH_NAME(floor))
-LAB_IMPLEMENT_BASIC_FUNCTION(round,TH_MATH_NAME(round))
-LAB_IMPLEMENT_BASIC_FUNCTION(abs,TH_MATH_NAME(fabs))
-LAB_IMPLEMENT_BASIC_FUNCTION(trunc,TH_MATH_NAME(trunc))
-LAB_IMPLEMENT_BASIC_FUNCTION(frac,TH_MATH_NAME(TH_frac))
-LAB_IMPLEMENT_BASIC_FUNCTION(neg,-)
-LAB_IMPLEMENT_BASIC_FUNCTION(cinv, TH_MATH_NAME(1.0) / )
-
-
-void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty)
-{
- THTensor_(resizeAs)(r_, tx);
- TH_TENSOR_APPLY3(real, r_, real, tx, real, ty, *r__data = TH_MATH_NAME(atan2)(*tx_data,*ty_data););
-}
-
-void THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight)
-{
- THArgCheck(THTensor_(nElement)(a) == THTensor_(nElement)(b), 2, "sizes do not match");
- THTensor_(resizeAs)(r_, a);
- TH_TENSOR_APPLY3(real, r_, real, a, real, b, *r__data = TH_MATH_NAME(TH_lerp)(*a_data, *b_data, weight););
-}
-
-void THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim)
-{
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "invalid dimension %d",
- dimension + TH_INDEX_BASE);
-
- THTensor_(sum)(r_, t, dimension, keepdim);
- THTensor_(div)(r_, r_, t->size[dimension]);
-}
-
-void THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim)
-{
- THLongStorage *dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d",
- dimension + TH_INDEX_BASE);
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(r_, dim, NULL);
- THLongStorage_free(dim);
-
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal sum = 0;
- accreal sum2 = 0;
- long i;
- for(i = 0; i < t_size; i++)
- {
- real z = t_data[i*t_stride];
- sum += z;
- sum2 += z*z;
- }
-
- if(flag)
- {
- sum /= t_size;
- sum2 /= t_size;
- sum2 -= sum*sum;
- sum2 = (sum2 < 0 ? 0 : sum2);
- *r__data = (real)TH_MATH_NAME(sqrt)(sum2);
- }
- else
- {
- sum /= t_size;
- sum2 /= t_size-1;
- sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum;
- sum2 = (sum2 < 0 ? 0 : sum2);
- *r__data = (real)TH_MATH_NAME(sqrt)(sum2);
- });
-
- if (!keepdim) {
- THTensor_(squeeze1d)(r_, r_, dimension);
- }
-}
-
-void THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim)
-{
- THLongStorage *dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d",
- dimension + TH_INDEX_BASE);
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(r_, dim, NULL);
- THLongStorage_free(dim);
-
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal sum = 0;
- accreal sum2 = 0;
- long i;
- for(i = 0; i < t_size; i++)
- {
- real z = t_data[i*t_stride];
- sum += z;
- sum2 += z*z;
- }
-
- if(flag)
- {
- sum /= t_size;
- sum2 /= t_size;
- sum2 -= sum*sum;
- sum2 = (sum2 < 0 ? 0 : sum2);
- *r__data = sum2;
- }
- else
- {
- sum /= t_size;
- sum2 /= t_size-1;
- sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum;
- sum2 = (sum2 < 0 ? 0 : sum2);
- *r__data = (real)sum2;
- });
-
- if (!keepdim) {
- THTensor_(squeeze1d)(r_, r_, dimension);
- }
-}
-
-void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim)
-{
- THLongStorage *dim;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d",
- dimension + TH_INDEX_BASE);
-
- dim = THTensor_(newSizeOf)(t);
- THLongStorage_set(dim, dimension, 1);
- THTensor_(resize)(r_, dim, NULL);
- THLongStorage_free(dim);
-
- if(value == 0) {
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal sum = 0;
- long i;
- for(i = 0; i < t_size; i++)
- sum += t_data[i*t_stride] != 0.0;
- *r__data = sum;)
- } else {
- TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
- accreal sum = 0;
- long i;
- for(i = 0; i < t_size; i++) {
- sum += TH_MATH_NAME(pow)(
- TH_MATH_NAME(fabs)(t_data[i*t_stride]), value);
- }
- *r__data = TH_MATH_NAME(pow)(sum, 1.0/value);)
- }
-
- if (!keepdim) {
- THTensor_(squeeze1d)(r_, r_, dimension);
- }
-}
-
-accreal THTensor_(normall)(THTensor *tensor, real value)
-{
- accreal sum = 0;
- if(value == 0) {
- TH_TENSOR_APPLY(real, tensor, sum += *tensor_data != 0.0;);
- return sum;
- } else if(value == 1) {
- TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(fabs)(*tensor_data););
- return sum;
- } else if(value == 2) {
- TH_TENSOR_APPLY(real, tensor, accreal z = *tensor_data; sum += z*z;);
- return sqrt(sum);
- } else {
- TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*tensor_data), value););
- return TH_MATH_NAME(pow)(sum, 1.0/value);
- }
-}
-
-void THTensor_(renorm)(THTensor *res, THTensor *src, real value, int dimension, real maxnorm)
-{
- int i;
- THTensor *rowR, *rowS;
-
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(src), 3, "invalid dimension %d",
- dimension + TH_INDEX_BASE);
- THArgCheck(value > 0, 2, "non-positive-norm not supported");
- THArgCheck(THTensor_(nDimension)(src) > 1, 1, "need at least 2 dimensions, got %d dimensions",
- THTensor_(nDimension)(src));
-
- rowR = THTensor_(new)();
- rowS = THTensor_(new)();
-
- THTensor_(resizeAs)(res, src);
-
- for (i=0; i<src->size[dimension]; i++)
- {
- real norm = 0;
- real new_norm;
-
- THTensor_(select)(rowS, src, dimension, i);
- THTensor_(select)(rowR, res, dimension, i);
- if (value == 1) {
- TH_TENSOR_APPLY(real, rowS, norm += fabs(*rowS_data););
- } else if (value == 2) {
- TH_TENSOR_APPLY(real, rowS, accreal z = *rowS_data; norm += z*z;);
- } else {
- TH_TENSOR_APPLY(real, rowS, norm += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*rowS_data), value););
- }
-
- norm = pow(norm, 1/value);
-
- if (norm > maxnorm)
- {
- new_norm = maxnorm / (norm + 1e-7);
-
- TH_TENSOR_APPLY2(
- real, rowR, real, rowS,
- *rowR_data = (*rowS_data) * new_norm;
- )
- }
- else
- THTensor_(copy)(rowR, rowS);
- }
-
- THTensor_(free)(rowR);
- THTensor_(free)(rowS);
-}
-
-accreal THTensor_(dist)(THTensor *tensor, THTensor *src, real value)
-{
- real sum = 0;
- TH_TENSOR_APPLY2(real, tensor, real, src,
- sum += TH_MATH_NAME(pow)(
- TH_MATH_NAME(fabs)(*tensor_data - *src_data), value););
- return TH_MATH_NAME(pow)(sum, 1.0/value);
-}
-
-accreal THTensor_(meanall)(THTensor *tensor)
-{
- THArgCheck(tensor->nDimension > 0, 1, "empty Tensor");
- return THTensor_(sumall)(tensor)/THTensor_(nElement)(tensor);
-}
-
-accreal THTensor_(varall)(THTensor *tensor)
-{
- accreal mean = THTensor_(meanall)(tensor);
- accreal sum = 0;
- TH_TENSOR_APPLY(real, tensor, sum += (*tensor_data - mean)*(*tensor_data - mean););
- sum /= (THTensor_(nElement)(tensor)-1);
- return sum;
-}
-
-accreal THTensor_(stdall)(THTensor *tensor)
-{
- return sqrt(THTensor_(varall)(tensor));
-}
-
-void THTensor_(linspace)(THTensor *r_, real a, real b, long n)
-{
- real i = 0;
-
- THArgCheck(n > 1 || (n == 1 && (a == b)), 3, "invalid number of points");
-
- if (THTensor_(nElement)(r_) != n) {
- THTensor_(resize1d)(r_, n);
- }
-
- if(n == 1) {
- TH_TENSOR_APPLY(real, r_,
- *r__data = a;
- i++;
- );
- } else {
- TH_TENSOR_APPLY(real, r_,
- *r__data = a + i*(b-a)/((real)(n-1));
- i++;
- );
- }
-}
-
-void THTensor_(logspace)(THTensor *r_, real a, real b, long n)
-{
- real i = 0;
-
- THArgCheck(n > 1 || (n == 1 && (a == b)), 3, "invalid number of points");
-
- if (THTensor_(nElement)(r_) != n) {
- THTensor_(resize1d)(r_, n);
- }
-
- if(n == 1) {
- TH_TENSOR_APPLY(real, r_,
- *r__data = TH_MATH_NAME(pow)(10.0, a);
- i++;
- );
- } else {
- TH_TENSOR_APPLY(real, r_,
- *r__data = TH_MATH_NAME(pow)(10.0, a + i*(b-a)/((real)(n-1)));
- i++;
- );
- }
-}
-
-void THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size)
-{
- THTensor_(resize)(r_, size, NULL);
- THTensor_(uniform)(r_, _generator, 0, 1);
-}
-
-void THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size)
-{
- THTensor_(resize)(r_, size, NULL);
- THTensor_(normal)(r_, _generator, 0, 1);
-}
-
-void THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue)
-{
- real minval;
- real maxval;
- real *h_data;
-
- THTensor_(resize1d)(hist, nbins);
- THTensor_(zero)(hist);
- minval = minvalue;
- maxval = maxvalue;
- if (minval == maxval)
- {
- minval = THTensor_(minall)(tensor);
- maxval = THTensor_(maxall)(tensor);
- }
- if (minval == maxval)
- {
- minval = minval - 1;
- maxval = maxval + 1;
- }
-
- h_data = THTensor_(data)(hist);
-
- TH_TENSOR_APPLY(real, tensor,
- if (*tensor_data >= minval && *tensor_data <= maxval) {
- const int bin = (int)((*tensor_data-minval) / (maxval-minval) * nbins);
- h_data[THMin(bin, nbins-1)] += 1;
- }
- );
-}
-
-void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue)
-{
- THArgCheck(THTensor_(nDimension)(tensor) < 3, 2, "invalid dimension %d, the input must be a 2d tensor", THTensor_(nDimension)(tensor));
-
- int dimension = 1;
- THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(tensor), 2, "invalid dimension %d",
- dimension + TH_INDEX_BASE);
-
- real minval;
- real maxval;
- real *h_data;
-
- THTensor_(resize2d)(hist, tensor->size[0], nbins);
- THTensor_(zero)(hist);
-
- minval = minvalue;
- maxval = maxvalue;
- if (minval == maxval)
- {
- minval = THTensor_(minall)(tensor);
- maxval = THTensor_(maxall)(tensor);
- }
- if (minval == maxval)
- {
- minval = minval - 1;
- maxval = maxval + 1;
- }
-
- TH_TENSOR_DIM_APPLY2(real, tensor, real, hist, dimension, long i;
- for(i = 0; i < tensor_size; i++)
- {
- if(tensor_data[i*tensor_stride] >= minval && tensor_data[i*tensor_stride] <= maxval) {
- const int bin = (int)((tensor_data[i*tensor_stride]-minval) / (maxval-minval) * nbins);
- hist_data[THMin(bin, nbins-1)] += 1;
- }
- }
- );
-}
-
-#undef TH_MATH_NAME
-#endif /* floating point only part */
-#undef IS_NONZERO
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.h
deleted file mode 100644
index 17e54ccf6..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.h
+++ /dev/null
@@ -1,198 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorMath.h"
-#else
-
-TH_API void THTensor_(fill)(THTensor *r_, real value);
-TH_API void THTensor_(zero)(THTensor *r_);
-
-TH_API void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value);
-TH_API void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src);
-TH_API void THTensor_(maskedSelect)(THTensor *tensor, THTensor* src, THByteTensor *mask);
-
-TH_API void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor);
-
-TH_API void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index);
-TH_API void THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);
-TH_API void THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);
-TH_API void THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, real val);
-
-TH_API void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index);
-TH_API void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);
-TH_API void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);
-TH_API void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val);
-
-TH_API accreal THTensor_(dot)(THTensor *t, THTensor *src);
-
-TH_API real THTensor_(minall)(THTensor *t);
-TH_API real THTensor_(maxall)(THTensor *t);
-TH_API real THTensor_(medianall)(THTensor *t);
-TH_API accreal THTensor_(sumall)(THTensor *t);
-TH_API accreal THTensor_(prodall)(THTensor *t);
-
-TH_API void THTensor_(neg)(THTensor *self, THTensor *src);
-TH_API void THTensor_(cinv)(THTensor *self, THTensor *src);
-
-TH_API void THTensor_(add)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(sub)(THTensor *self, THTensor *src, real value);
-TH_API void THTensor_(mul)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(div)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(lshift)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(rshift)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(fmod)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(remainder)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value);
-TH_API void THTensor_(bitand)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(bitor)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(bitxor)(THTensor *r_, THTensor *t, real value);
-
-TH_API void THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src);
-TH_API void THTensor_(csub)(THTensor *self, THTensor *src1, real value, THTensor *src2);
-TH_API void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src);
-TH_API void THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src);
-
-TH_API void THTensor_(addcmul)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2);
-TH_API void THTensor_(addcdiv)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2);
-
-TH_API void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat, THTensor *vec);
-TH_API void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat1, THTensor *mat2);
-TH_API void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2);
-
-TH_API void THTensor_(addbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2);
-TH_API void THTensor_(baddbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2);
-
-TH_API void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain);
-
-TH_API ptrdiff_t THTensor_(numel)(THTensor *t);
-TH_API void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);
-TH_API void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);
-TH_API void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim);
-TH_API void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);
-TH_API void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);
-TH_API void THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim);
-TH_API void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim);
-TH_API void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension);
-TH_API void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension);
-TH_API void THTensor_(sign)(THTensor *r_, THTensor *t);
-TH_API accreal THTensor_(trace)(THTensor *t);
-TH_API void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension);
-
-TH_API void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src);
-TH_API void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src);
-TH_API void THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value);
-TH_API void THTensor_(cminValue)(THTensor *r, THTensor *t, real value);
-
-TH_API void THTensor_(zeros)(THTensor *r_, THLongStorage *size);
-TH_API void THTensor_(ones)(THTensor *r_, THLongStorage *size);
-TH_API void THTensor_(diag)(THTensor *r_, THTensor *t, int k);
-TH_API void THTensor_(eye)(THTensor *r_, long n, long m);
-TH_API void THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step);
-TH_API void THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step);
-TH_API void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n);
-
-TH_API void THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size);
-TH_API void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder);
-TH_API void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted);
-TH_API void THTensor_(tril)(THTensor *r_, THTensor *t, long k);
-TH_API void THTensor_(triu)(THTensor *r_, THTensor *t, long k);
-TH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension);
-TH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension);
-
-TH_API int THTensor_(equal)(THTensor *ta, THTensor *tb);
-
-TH_API void THTensor_(ltValue)(THByteTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(leValue)(THByteTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(gtValue)(THByteTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(geValue)(THByteTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(neValue)(THByteTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(eqValue)(THByteTensor *r_, THTensor* t, real value);
-
-TH_API void THTensor_(ltValueT)(THTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(leValueT)(THTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(gtValueT)(THTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(geValueT)(THTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(neValueT)(THTensor *r_, THTensor* t, real value);
-TH_API void THTensor_(eqValueT)(THTensor *r_, THTensor* t, real value);
-
-TH_API void THTensor_(ltTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(leTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(gtTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(geTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(neTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(eqTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);
-
-TH_API void THTensor_(ltTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(leTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(gtTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(geTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(neTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);
-TH_API void THTensor_(eqTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);
-
-#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG)
-TH_API void THTensor_(abs)(THTensor *r_, THTensor *t);
-#endif
-
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
-
-TH_API void THTensor_(sigmoid)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(log)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(lgamma)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(log1p)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(exp)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(cos)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(acos)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(cosh)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(sin)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(asin)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(sinh)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(tan)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(atan)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty);
-TH_API void THTensor_(tanh)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(pow)(THTensor *r_, THTensor *t, real value);
-TH_API void THTensor_(tpow)(THTensor *r_, real value, THTensor *t);
-TH_API void THTensor_(sqrt)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(rsqrt)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(ceil)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(floor)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(round)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(abs)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(trunc)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(frac)(THTensor *r_, THTensor *t);
-TH_API void THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight);
-
-TH_API void THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim);
-TH_API void THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim);
-TH_API void THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim);
-TH_API void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim);
-TH_API void THTensor_(renorm)(THTensor *r_, THTensor *t, real value, int dimension, real maxnorm);
-TH_API accreal THTensor_(dist)(THTensor *a, THTensor *b, real value);
-TH_API void THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue);
-TH_API void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue);
-
-TH_API accreal THTensor_(meanall)(THTensor *self);
-TH_API accreal THTensor_(varall)(THTensor *self);
-TH_API accreal THTensor_(stdall)(THTensor *self);
-TH_API accreal THTensor_(normall)(THTensor *t, real value);
-
-TH_API void THTensor_(linspace)(THTensor *r_, real a, real b, long n);
-TH_API void THTensor_(logspace)(THTensor *r_, real a, real b, long n);
-TH_API void THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size);
-TH_API void THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size);
-#endif
-
-#if defined(TH_REAL_IS_BYTE)
-
-TH_API int THTensor_(logicalall)(THTensor *self);
-TH_API int THTensor_(logicalany)(THTensor *self);
-
-#endif /* TH_REAL_IS_BYTE */
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.c
deleted file mode 100644
index 514d3dd27..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.c
+++ /dev/null
@@ -1,250 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorRandom.c"
-#else
-
-void THTensor_(random)(THTensor *self, THGenerator *_generator)
-{
-#if defined(TH_REAL_IS_BYTE)
- TH_TENSOR_APPLY(real, self, *self_data = (unsigned char)(THRandom_random(_generator) % (UCHAR_MAX+1)););
-#elif defined(TH_REAL_IS_CHAR)
- TH_TENSOR_APPLY(real, self, *self_data = (char)(THRandom_random(_generator) % (CHAR_MAX+1)););
-#elif defined(TH_REAL_IS_SHORT)
- TH_TENSOR_APPLY(real, self, *self_data = (short)(THRandom_random(_generator) % (SHRT_MAX+1)););
-#elif defined(TH_REAL_IS_INT)
- TH_TENSOR_APPLY(real, self, *self_data = (int)(THRandom_random(_generator) % (INT_MAX+1UL)););
-#elif defined(TH_REAL_IS_LONG)
- TH_TENSOR_APPLY(real, self, *self_data = (long)(THRandom_random(_generator) % (LONG_MAX+1UL)););
-#elif defined(TH_REAL_IS_FLOAT)
- TH_TENSOR_APPLY(real, self, *self_data = (float)(THRandom_random(_generator) % ((1UL << FLT_MANT_DIG)+1)););
-#elif defined(TH_REAL_IS_DOUBLE)
- TH_TENSOR_APPLY(real, self, *self_data = (double)(THRandom_random(_generator) % ((1ULL << DBL_MANT_DIG)+1)););
-#else
-#error "Unknown type"
-#endif
-}
-
-void THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p)
-{
- TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_geometric(_generator, p););
-}
-
-void THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p)
-{
- TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_bernoulli(_generator, p););
-}
-
-void THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p)
-{
- TH_TENSOR_APPLY2(real, self, float, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data););
-}
-
-void THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p)
-{
- TH_TENSOR_APPLY2(real, self, double, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data););
-}
-
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
-
-void THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b)
-{
- TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_uniform(_generator, a, b););
-}
-
-void THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv)
-{
- TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_normal(_generator, mean, stdv););
-}
-
-void THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda)
-{
- TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_exponential(_generator, lambda););
-}
-
-void THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma)
-{
- TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_cauchy(_generator, median, sigma););
-}
-
-void THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv)
-{
- TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_logNormal(_generator, mean, stdv););
-}
-
-void THTensor_(multinomial)(THLongTensor *self, THGenerator *_generator, THTensor *prob_dist, int n_sample, int with_replacement)
-{
- int start_dim = THTensor_(nDimension)(prob_dist);
- long n_dist;
- long n_categories;
- THDoubleTensor* cum_dist;
- int i,j,k;
-
- if (start_dim == 1)
- {
- THTensor_(resize2d)(prob_dist, 1, THTensor_(size)(prob_dist, 0));
- }
-
- n_dist = THTensor_(size)(prob_dist, 0);
- n_categories = THTensor_(size)(prob_dist, 1);
-
- THArgCheck(n_sample > 0, 2, "cannot sample n_sample < 0 samples");
-
- if (!with_replacement)
- {
- THArgCheck((!with_replacement) && (n_sample <= n_categories), 2, \
- "cannot sample n_sample > prob_dist:size(1) samples without replacement");
- }
-
- /* cumulative probability distribution vector */
- cum_dist = THDoubleTensor_newWithSize1d(n_categories);
-
- /* will contain multinomial samples (category indices to be returned) */
- THLongTensor_resize2d(self, n_dist , n_sample);
-
- for (i=0; i<n_dist; i++)
- {
- /* Get normalized cumulative distribution from prob distribution */
- double sum = 0;
- for (j=0; j<n_categories; j++)
- {
- sum += THStorage_(get)( \
- prob_dist->storage, \
- prob_dist->storageOffset+i*prob_dist->stride[0]+j*prob_dist->stride[1] \
- );
- THDoubleStorage_set(
- cum_dist->storage, \
- cum_dist->storageOffset+j*cum_dist->stride[0], \
- sum \
- );
- }
- THArgCheckWithCleanup((sum > 0), THCleanup(THDoubleTensor_free(cum_dist);), 2,
- "invalid multinomial distribution (sum of probabilities <= 0)");
- /* normalize cumulative probability distribution so that last val is 1
- i.e. doesn't assume original prob_dist row sums to one */
- if ( (sum > 0) || ( ( sum < 1.00001) && (sum > 0.99999) ) )
- {
- for (j=0; j<n_categories; j++)
- {
- THDoubleTensor_data(cum_dist)[j*cum_dist->stride[0]] /= sum;
- }
- }
-
- for (j=0; j<n_sample; j++)
- {
- /* sample a probability mass from a uniform distribution */
- double uniform_sample = THRandom_uniform(_generator, 0, 1);
- /* Do a binary search for the slot in which the prob falls
- ie cum_dist[row][slot-1] < uniform_prob < cum_distr[row][slot] */
- int left_pointer = 0;
- int right_pointer = n_categories;
- int mid_pointer;
- double cum_prob;
- int sample_idx;
- /* Make sure the last cumulative distribution bucket sums to 1 */
- THDoubleTensor_data(cum_dist)[(n_categories-1)*cum_dist->stride[0]] = 1;
-
- while(right_pointer - left_pointer > 0)
- {
- mid_pointer = left_pointer + (right_pointer - left_pointer) / 2;
- cum_prob = THDoubleStorage_get( \
- cum_dist->storage, \
- cum_dist->storageOffset+mid_pointer*cum_dist->stride[0] \
- );
- if (cum_prob < uniform_sample)
- {
- left_pointer = mid_pointer + 1;
- }
- else
- {
- right_pointer = mid_pointer;
- }
- }
- sample_idx = left_pointer;
-
- /* store in result tensor (will be incremented for lua compat by wrapper) */
- THLongStorage_set( \
- self->storage, \
- self->storageOffset+i*self->stride[0]+j*self->stride[1], \
- sample_idx \
- );
-
- /* Once a sample is drawn, it cannot be drawn again. ie sample without replacement */
- if (!with_replacement)
- {
- /* update cumulative distribution so that sample cannot be drawn again */
- double diff;
- double new_val = 0;
- double sum;
-
- if (sample_idx != 0)
- {
- new_val = THDoubleStorage_get( \
- cum_dist->storage, \
- cum_dist->storageOffset+(sample_idx-1)*cum_dist->stride[0] \
- );
- }
- /* marginal cumulative mass (i.e. original probability) of sample */
- diff = THDoubleStorage_get( \
- cum_dist->storage, \
- cum_dist->storageOffset+sample_idx*cum_dist->stride[0] \
- ) - new_val;
- /* new sum of marginals is not one anymore... */
- sum = 1.0 - diff;
- for (k=0; k<n_categories; k++)
- {
- new_val = THDoubleStorage_get( \
- cum_dist->storage, \
- cum_dist->storageOffset+k*cum_dist->stride[0] \
- );
- if (k >= sample_idx)
- {
- /* remove sampled probability mass from later cumulative probabilities */
- new_val -= diff;
- }
- /* make total marginals sum to one */
- new_val /= sum;
- THDoubleStorage_set( \
- cum_dist->storage, \
- cum_dist->storageOffset+k*cum_dist->stride[0], \
- new_val \
- );
- }
- }
- }
- }
-
- THDoubleTensor_free(cum_dist);
-
- if (start_dim == 1)
- {
- THLongTensor_resize1d(self, n_sample);
- THTensor_(resize1d)(prob_dist, n_categories);
- }
-}
-
-#endif
-
-#if defined(TH_REAL_IS_BYTE)
-void THTensor_(getRNGState)(THGenerator *_generator, THTensor *self)
-{
- static const size_t size = sizeof(THGenerator);
- THGenerator *rng_state;
- THTensor_(resize1d)(self, size);
- THArgCheck(THTensor_(nElement)(self) == size, 1, "RNG state is wrong size");
- THArgCheck(THTensor_(isContiguous)(self), 1, "RNG state needs to be contiguous");
- rng_state = (THGenerator *)THTensor_(data)(self);
- THGenerator_copy(rng_state, _generator);
-}
-
-void THTensor_(setRNGState)(THGenerator *_generator, THTensor *self)
-{
- static const size_t size = sizeof(THGenerator);
- THGenerator *rng_state;
- THArgCheck(THTensor_(nElement)(self) == size, 1, "RNG state is wrong size");
- THArgCheck(THTensor_(isContiguous)(self), 1, "RNG state needs to be contiguous");
- rng_state = (THGenerator *)THTensor_(data)(self);
- THArgCheck(THGenerator_isValid(rng_state), 1, "Invalid RNG state");
- THGenerator_copy(_generator, rng_state);
-}
-#endif
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.h
deleted file mode 100644
index d20514242..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THTensorRandom.h"
-#else
-
-TH_API void THTensor_(random)(THTensor *self, THGenerator *_generator);
-TH_API void THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p);
-TH_API void THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p);
-TH_API void THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p);
-TH_API void THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p);
-
-#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
-TH_API void THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b);
-TH_API void THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv);
-TH_API void THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda);
-TH_API void THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma);
-TH_API void THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv);
-TH_API void THTensor_(multinomial)(THLongTensor *self, THGenerator *_generator, THTensor *prob_dist, int n_sample, int with_replacement);
-#endif
-
-#if defined(TH_REAL_IS_BYTE)
-TH_API void THTensor_(getRNGState)(THGenerator *_generator, THTensor *self);
-TH_API void THTensor_(setRNGState)(THGenerator *_generator, THTensor *self);
-#endif
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THVector.h b/contrib/lua-torch/torch7/lib/TH/generic/THVector.h
deleted file mode 100644
index 7d368541a..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THVector.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THVector.h"
-#else
-
-TH_API void THVector_(fill)(real *x, const real c, const ptrdiff_t n);
-TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n);
-TH_API void THVector_(adds)(real *y, const real *x, const real c, const ptrdiff_t n);
-TH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n);
-TH_API void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n);
-TH_API void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n);
-TH_API void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n);
-TH_API void THVector_(copy)(real *y, const real *x, const ptrdiff_t n);
-
-/* Initialize the dispatch pointers */
-TH_API void THVector_(vectorDispatchInit)(void);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDefault.c b/contrib/lua-torch/torch7/lib/TH/generic/THVectorDefault.c
deleted file mode 100644
index 3388e0d9b..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDefault.c
+++ /dev/null
@@ -1,131 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THVectorDefault.c"
-#else
-
-void THVector_(copy_DEFAULT)(real *x, const real *y, const ptrdiff_t n) {
- ptrdiff_t i = 0;
-
- for(; i <n-4; i+=4)
- {
- x[i] = y[i];
- x[i+1] = y[i+1];
- x[i+2] = y[i+2];
- x[i+3] = y[i+3];
- }
-
- for(; i < n; i++)
- x[i] = y[i];
-}
-
-void THVector_(fill_DEFAULT)(real *x, const real c, const ptrdiff_t n) {
- ptrdiff_t i = 0;
-
- for(; i <n-4; i+=4)
- {
- x[i] = c;
- x[i+1] = c;
- x[i+2] = c;
- x[i+3] = c;
- }
-
- for(; i < n; i++)
- x[i] = c;
-}
-
-void THVector_(cadd_DEFAULT)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n)
-{
- ptrdiff_t i = 0;
-
- for(; i<n-4; i+=4)
- {
- z[i] = x[i] + c * y[i];
- z[i+1] = x[i+1] + c * y[i+1];
- z[i+2] = x[i+2] + c * y[i+2];
- z[i+3] = x[i+3] + c * y[i+3];
- }
-
- for(; i<n; i++)
- z[i] = x[i] + c * y[i];
-}
-
-void THVector_(adds_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
-{
- ptrdiff_t i = 0;
-
- for(; i<n-4; i+=4)
- {
- y[i] = x[i] + c;
- y[i+1] = x[i+1] + c;
- y[i+2] = x[i+2] + c;
- y[i+3] = x[i+3] + c;
- }
-
- for(; i<n; i++)
- y[i] = x[i] + c;
-}
-
-void THVector_(cmul_DEFAULT)(real *z, const real *x, const real *y, const ptrdiff_t n)
-{
- ptrdiff_t i = 0;
-
- for(; i <n-4; i+=4)
- {
- z[i] = x[i] * y[i];
- z[i+1] = x[i+1] * y[i+1];
- z[i+2] = x[i+2] * y[i+2];
- z[i+3] = x[i+3] * y[i+3];
- }
-
- for(; i < n; i++)
- z[i] = x[i] * y[i];
-}
-
-void THVector_(muls_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
-{
- ptrdiff_t i = 0;
-
- for(; i <n-4; i+=4)
- {
- y[i] = x[i] * c;
- y[i+1] = x[i+1] * c;
- y[i+2] = x[i+2] * c;
- y[i+3] = x[i+3] * c;
- }
-
- for(; i < n; i++)
- y[i] = x[i] * c;
-}
-
-void THVector_(cdiv_DEFAULT)(real *z, const real *x, const real *y, const ptrdiff_t n)
-{
- ptrdiff_t i = 0;
-
- for(; i<n-4; i+=4)
- {
- z[i] = x[i] / y[i];
- z[i+1] = x[i+1] / y[i+1];
- z[i+2] = x[i+2] / y[i+2];
- z[i+3] = x[i+3] / y[i+3];
- }
-
- for(; i < n; i++)
- z[i] = x[i] / y[i];
-}
-
-void THVector_(divs_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)
-{
- ptrdiff_t i = 0;
-
- for(; i<n-4; i+=4)
- {
- y[i] = x[i] / c;
- y[i+1] = x[i+1] / c;
- y[i+2] = x[i+2] / c;
- y[i+3] = x[i+3] / c;
- }
-
- for(; i < n; i++)
- y[i] = x[i] / c;
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDispatch.c b/contrib/lua-torch/torch7/lib/TH/generic/THVectorDispatch.c
deleted file mode 100644
index 5b8885283..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDispatch.c
+++ /dev/null
@@ -1,262 +0,0 @@
-#ifndef TH_GENERIC_FILE
-#define TH_GENERIC_FILE "generic/THVectorDispatch.c"
-#else
-
-/* For now there are only SIMD implementations for FLOAT and DOUBLE.
- * Hopefully in the future this can be made totally generic (e.g, there are SIMD implementations
- * for a lot of functions */
-/* Each function with multiple implementations has:
- * 1. A DISPATCHPTR which will be initialized to point to the best available implementation for the host
- * 2. A DISPATCHTABLE which holds pointers to each implementation of a function, and a value indicating
- * which SIMD extension a given implementation uses
- * 3. A dispatch stub, which is what is actually called by clients, that simply wraps the dispatch pointer.
- */
-
-static void (*THVector_(fill_DISPATCHPTR))(real *, const real, const ptrdiff_t) = &THVector_(fill_DEFAULT);
-static FunctionDescription THVector_(fill_DISPATCHTABLE)[] = {
- #if defined(__NEON__)
- #if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(fill_NEON), SIMDExtension_NEON),
- #endif
- #endif
-
- #if defined(__PPC64__)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(fill_VSX), SIMDExtension_VSX),
- #endif
- #endif
-
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(fill_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(fill_SSE), SIMDExtension_SSE),
- #endif
- #endif
- FUNCTION_IMPL(THVector_(fill_DEFAULT), SIMDExtension_DEFAULT)
-};
-void THVector_(fill)(real *x, const real c, const ptrdiff_t n) {
- THVector_(fill_DISPATCHPTR)(x, c, n);
-}
-
-static void (*THVector_(cadd_DISPATCHPTR))(real *, const real *, const real *, const real, const ptrdiff_t) = &THVector_(cadd_DEFAULT);
-static FunctionDescription THVector_(cadd_DISPATCHTABLE)[] = {
- #if defined(__NEON__)
- #if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cadd_NEON), SIMDExtension_NEON),
- #endif
- #endif
-
- #if defined(USE_AVX2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cadd_AVX2), SIMDExtension_AVX2),
- #endif
- #endif
-
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cadd_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cadd_SSE), SIMDExtension_SSE),
- #endif
- #endif
-
- FUNCTION_IMPL(THVector_(cadd_DEFAULT), SIMDExtension_DEFAULT)
-};
-void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n) {
- THVector_(cadd_DISPATCHPTR)(z, x, y, c, n);
-}
-
-static void (*THVector_(adds_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(adds_DEFAULT);
-static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = {
- #if defined(__NEON__)
- #if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(adds_NEON), SIMDExtension_NEON),
- #endif
- #endif
-
- #if defined(__PPC64__)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(adds_VSX), SIMDExtension_VSX),
- #endif
- #endif
-
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(adds_SSE), SIMDExtension_SSE),
- #endif
- #endif
-
- FUNCTION_IMPL(THVector_(adds_DEFAULT), SIMDExtension_DEFAULT)
-};
-// Dispatch stubs that just call the pointers
-TH_API void THVector_(adds)(real *r_, const real *t, const real value, const ptrdiff_t n) {
- THVector_(adds_DISPATCHPTR)(r_, t, value, n);
-}
-
-static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);
-static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
- #if defined(__NEON__)
- #if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cmul_NEON), SIMDExtension_NEON),
- #endif
- #endif
-
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cmul_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cmul_SSE), SIMDExtension_SSE),
- #endif
- #endif
-
- FUNCTION_IMPL(THVector_(cmul_DEFAULT), SIMDExtension_DEFAULT)
-};
-void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) {
- THVector_(cmul_DISPATCHPTR)(z, x, y, n);
-}
-
-static void (*THVector_(muls_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(muls_DEFAULT);
-static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = {
- #if defined(__NEON__)
- #if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(muls_NEON), SIMDExtension_NEON),
- #endif
- #endif
-
- #if defined(__PPC64__)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(muls_VSX), SIMDExtension_VSX),
- #endif
- #endif
-
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(muls_SSE), SIMDExtension_SSE),
- #endif
- #endif
-
- FUNCTION_IMPL(THVector_(muls_DEFAULT), SIMDExtension_DEFAULT)
-};
-void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n) {
- THVector_(muls_DISPATCHPTR)(y, x, c, n);
-}
-
-static void (*THVector_(cdiv_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cdiv_DEFAULT);
-static FunctionDescription THVector_(cdiv_DISPATCHTABLE)[] = {
- #if defined(__NEON__)
- #if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cdiv_NEON), SIMDExtension_NEON),
- #endif
- #endif
-
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cdiv_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(cdiv_SSE), SIMDExtension_SSE),
- #endif
- #endif
-
- FUNCTION_IMPL(THVector_(cdiv_DEFAULT), SIMDExtension_DEFAULT)
-};
-void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n) {
- THVector_(cdiv_DISPATCHPTR)(z, x, y, n);
-}
-
-static void (*THVector_(divs_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(divs_DEFAULT);
-static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = {
- #if defined(__NEON__)
- #if defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(divs_NEON), SIMDExtension_NEON),
- #endif
- #endif
-
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \
- || defined(USE_SSE4_1) || defined(USE_SSE4_2)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(divs_SSE), SIMDExtension_SSE),
- #endif
- #endif
-
- FUNCTION_IMPL(THVector_(divs_DEFAULT), SIMDExtension_DEFAULT)
-};
-void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) {
- THVector_(divs_DISPATCHPTR)(y, x, c, n);
-}
-
-static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT);
-static FunctionDescription THVector_(copy_DISPATCHTABLE)[] = {
- #if defined(USE_AVX)
- #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
- FUNCTION_IMPL(THVector_(copy_AVX), SIMDExtension_AVX),
- #endif
- #endif
-
- FUNCTION_IMPL(THVector_(copy_DEFAULT), SIMDExtension_DEFAULT)
-};
-void THVector_(copy)(real *y, const real *x, const ptrdiff_t n) {
- THVector_(copy_DISPATCHPTR)(y, x, n);
-}
-
-/* This needs to be called in order to initialize the dispatch pointers at runtime.
- * This function simply checks what SIMD extensions are available, and then walks the dispatch table
- * to choose the best function.
- * NOTE: As implemented, it will initialize the dispatch pointer to the first supported function.
- * This means that in the dispatch tables, implementations supporting more recent extensions
- * need to come first
- */
-void THVector_(vectorDispatchInit)(void)
-{
- uint32_t hostSimdExts = detectHostSIMDExtensions();
- INIT_DISPATCH_PTR(fill);
- INIT_DISPATCH_PTR(cadd);
- INIT_DISPATCH_PTR(adds);
- INIT_DISPATCH_PTR(cmul);
- INIT_DISPATCH_PTR(muls);
- INIT_DISPATCH_PTR(cdiv);
- INIT_DISPATCH_PTR(divs);
- INIT_DISPATCH_PTR(copy);
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/common_simd.h b/contrib/lua-torch/torch7/lib/TH/generic/simd/common_simd.h
deleted file mode 100644
index 425b4b96e..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/simd/common_simd.h
+++ /dev/null
@@ -1,395 +0,0 @@
-#ifndef COMMON_SIMD_H
-#define COMMON_SIMD_H
-
-/* Weights */
-#define LOAD_WEIGHT(q, simd_type, inst_var) _m ## simd_type ## inst_var(*(q))
-
-#define DECLARE_WEIGHTS(simd_type) \
-__ ## simd_type weight0; \
-__ ## simd_type weight1; \
-__ ## simd_type weight2; \
-__ ## simd_type weight3; \
-__ ## simd_type weight4;
-
-#define LOAD_WEIGHTS(k, simd_type, inst_var) \
-weight0 = LOAD_WEIGHT(weight + 5 * 0 + k, simd_type, inst_var); \
-weight1 = LOAD_WEIGHT(weight + 5 * 1 + k, simd_type, inst_var); \
-weight2 = LOAD_WEIGHT(weight + 5 * 2 + k, simd_type, inst_var); \
-weight3 = LOAD_WEIGHT(weight + 5 * 3 + k, simd_type, inst_var); \
-weight4 = LOAD_WEIGHT(weight + 5 * 4 + k, simd_type, inst_var);
-
-/* Inputs declare */
-#define DECLARE_INPUT_0(i) \
-float* input0 = image + i; \
-
-#define DECLARE_INPUT_1() \
-float* input1 = input0 + inputStride; \
-float* input2 = input1 + inputStride; \
-float* input3 = input2 + inputStride; \
-float* input4 = input3 + inputStride;
-
-#define DECLARE_INPUT_2() \
-DECLARE_INPUT_1() \
-float* input5 = input4 + inputStride;
-
-#define DECLARE_INPUT_4() \
-DECLARE_INPUT_2() \
-float* input6 = input5 + inputStride; \
-float* input7 = input6 + inputStride;
-
-#define DECLARE_INPUT_5() \
-DECLARE_INPUT_4() \
-float* input8 = input7 + inputStride;
-
-#define DECLARE_INPUT_6() \
-DECLARE_INPUT_5() \
-float* input9 = input8 + inputStride;
-
-#define DECLARE_INPUT_7() \
-DECLARE_INPUT_6() \
-float* inputA = input9 + inputStride;
-
-#define DECLARE_INPUT_8() \
-DECLARE_INPUT_7() \
-float* inputB = inputA + inputStride;
-
-
-/* Inputs increment */
-#define INC_INPUT_1()\
-input0++; \
-input1++; \
-input2++; \
-input3++; \
-input4++; \
-
-#define INC_INPUT_2()\
-INC_INPUT_1() \
-input5++;
-
-#define INC_INPUT_4()\
-INC_INPUT_2() \
-input6++; \
-input7++;
-
-#define INC_INPUT_5()\
-INC_INPUT_4() \
-input8++;
-
-#define INC_INPUT_6()\
-INC_INPUT_5() \
-input9++;
-
-#define INC_INPUT_7()\
-INC_INPUT_6() \
-inputA++;
-
-#define INC_INPUT_8()\
-INC_INPUT_7() \
-inputB++;
-
-/* Outputs declare */
-#define DECLARE_OUTPUT_1() \
-float* output0 = output;
-
-#define DECLARE_OUTPUT_2() \
-DECLARE_OUTPUT_1() \
-float* output1 = output0 + outputStride;
-
-#define DECLARE_OUTPUT_4() \
-DECLARE_OUTPUT_2() \
-float* output2 = output1 + outputStride; \
-float* output3 = output2 + outputStride;
-
-#define DECLARE_OUTPUT_5() \
-DECLARE_OUTPUT_4() \
-float* output4 = output3 + outputStride;
-
-#define DECLARE_OUTPUT_6() \
-DECLARE_OUTPUT_5() \
-float* output5 = output4 + outputStride;
-
-#define DECLARE_OUTPUT_7() \
-DECLARE_OUTPUT_6() \
-float* output6 = output5 + outputStride;
-
-#define DECLARE_OUTPUT_8() \
-DECLARE_OUTPUT_7() \
-float* output7 = output6 + outputStride;
-
-/* Outputs increment */
-#define INC_OUTPUT_1(x) \
-output0 += x;
-
-#define INC_OUTPUT_2(x) \
-INC_OUTPUT_1(x) \
-output1 += x;
-
-#define INC_OUTPUT_4(x) \
-INC_OUTPUT_2(x) \
-output2 += x; \
-output3 += x;
-
-#define INC_OUTPUT_5(x) \
-INC_OUTPUT_4(x) \
-output4 += x;
-
-#define INC_OUTPUT_6(x) \
-INC_OUTPUT_5(x) \
-output5 += x;
-
-#define INC_OUTPUT_7(x) \
-INC_OUTPUT_6(x) \
-output6 += x;
-
-#define INC_OUTPUT_8(x) \
-INC_OUTPUT_7(x) \
-output7 += x;
-
-/* Image declare */
-#define DECLARE_IMAGE_1(simd_type) \
-__ ## simd_type image0; \
-__ ## simd_type image1; \
-__ ## simd_type image2; \
-__ ## simd_type image3; \
-__ ## simd_type image4;
-
-#define DECLARE_IMAGE_2(simd_type) \
-DECLARE_IMAGE_1(simd_type) \
-__ ## simd_type image5;
-
-#define DECLARE_IMAGE_4(simd_type) \
-DECLARE_IMAGE_2(simd_type) \
-__ ## simd_type image6; \
-__ ## simd_type image7;
-
-#define DECLARE_IMAGE_5(simd_type) \
-DECLARE_IMAGE_4(simd_type) \
-__ ## simd_type image8;
-
-#define DECLARE_IMAGE_6(simd_type) \
-DECLARE_IMAGE_5(simd_type) \
-__ ## simd_type image9;
-
-#define DECLARE_IMAGE_7(simd_type) \
-DECLARE_IMAGE_6(simd_type) \
-__ ## simd_type imageA;
-
-#define DECLARE_IMAGE_8(simd_type) \
-DECLARE_IMAGE_7(simd_type) \
-__ ## simd_type imageB;
-
-/* Sums declare */
-#define DECLARE_SUM_1(simd_type) \
-__ ## simd_type sum0;
-
-#define DECLARE_SUM_2(simd_type) \
-DECLARE_SUM_1(simd_type) \
-__ ## simd_type sum1;
-
-#define DECLARE_SUM_4(simd_type) \
-DECLARE_SUM_2(simd_type) \
-__ ## simd_type sum2; \
-__ ## simd_type sum3;
-
-#define DECLARE_SUM_5(simd_type) \
-DECLARE_SUM_4(simd_type) \
-__ ## simd_type sum4;
-
-#define DECLARE_SUM_6(simd_type) \
-DECLARE_SUM_5(simd_type) \
-__ ## simd_type sum5;
-
-#define DECLARE_SUM_7(simd_type) \
-DECLARE_SUM_6(simd_type) \
-__ ## simd_type sum6;
-
-#define DECLARE_SUM_8(simd_type) \
-DECLARE_SUM_7(simd_type) \
-__ ## simd_type sum7;
-
-/* Sums load */
-#define LOAD_SUM_1(simd_type) \
-sum0 = _m ## simd_type ## _loadu_ps(output0);
-
-#define LOAD_SUM_2(simd_type) \
-LOAD_SUM_1(simd_type) \
-sum1 = _m ## simd_type ## _loadu_ps(output1);
-
-#define LOAD_SUM_4(simd_type) \
-LOAD_SUM_2(simd_type) \
-sum2 = _m ## simd_type ## _loadu_ps(output2); \
-sum3 = _m ## simd_type ## _loadu_ps(output3);
-
-#define LOAD_SUM_5(simd_type) \
-LOAD_SUM_4(simd_type) \
-sum4 = _m ## simd_type ## _loadu_ps(output4);
-
-#define LOAD_SUM_6(simd_type) \
-LOAD_SUM_5(simd_type) \
-sum5 = _m ## simd_type ## _loadu_ps(output5);
-
-#define LOAD_SUM_7(simd_type) \
-LOAD_SUM_6(simd_type) \
-sum6 = _m ## simd_type ## _loadu_ps(output6);
-
-#define LOAD_SUM_8(simd_type) \
-LOAD_SUM_7(simd_type) \
-sum7 = _m ## simd_type ## _loadu_ps(output7);
-
-/* Sums store */
-#define STORE_SUM_1(simd_type) \
-_m ## simd_type ## _storeu_ps(output0, sum0);
-
-#define STORE_SUM_2(simd_type) \
-STORE_SUM_1(simd_type) \
-_m ## simd_type ## _storeu_ps(output1, sum1);
-
-#define STORE_SUM_4(simd_type) \
-STORE_SUM_2(simd_type) \
-_m ## simd_type ## _storeu_ps(output2, sum2); \
-_m ## simd_type ## _storeu_ps(output3, sum3);
-
-#define STORE_SUM_5(simd_type) \
-STORE_SUM_4(simd_type) \
-_m ## simd_type ## _storeu_ps(output4, sum4);
-
-#define STORE_SUM_6(simd_type) \
-STORE_SUM_5(simd_type) \
-_m ## simd_type ## _storeu_ps(output5, sum5);
-
-#define STORE_SUM_7(simd_type) \
-STORE_SUM_6(simd_type) \
-_m ## simd_type ## _storeu_ps(output6, sum6);
-
-#define STORE_SUM_8(simd_type) \
-STORE_SUM_7(simd_type) \
-_m ## simd_type ## _storeu_ps(output7, sum7);
-
-/* Convolution */
-#define CONVOLVE_1ROWS(simd_type) \
-image0 = _m ## simd_type ## _loadu_ps(input0); \
-image1 = _m ## simd_type ## _loadu_ps(input1); \
-image2 = _m ## simd_type ## _loadu_ps(input2); \
-image3 = _m ## simd_type ## _loadu_ps(input3); \
-image4 = _m ## simd_type ## _loadu_ps(input4); \
-\
-sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight0, image0)); \
-sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight1, image1)); \
-sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight2, image2)); \
-sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight3, image3)); \
-sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight4, image4));
-
-#define CONVOLVE_2ROWS(simd_type) \
-CONVOLVE_1ROWS(simd_type) \
-image5 = _m ## simd_type ## _loadu_ps(input5); \
-sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight0, image1)); \
-sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight1, image2)); \
-sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight2, image3)); \
-sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight3, image4)); \
-sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight4, image5));
-
-#define CONVOLVE_4ROWS(simd_type) \
-CONVOLVE_2ROWS(simd_type) \
-image6 = _m ## simd_type ## _loadu_ps(input6); \
-sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight0, image2)); \
-sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight1, image3)); \
-sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight2, image4)); \
-sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight3, image5)); \
-sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight4, image6)); \
-\
-image7 = _m ## simd_type ## _loadu_ps(input7); \
-sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight0, image3)); \
-sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight1, image4)); \
-sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight2, image5)); \
-sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight3, image6)); \
-sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight4, image7));
-
-#define CONVOLVE_5ROWS(simd_type) \
-CONVOLVE_4ROWS(simd_type) \
-image8 = _m ## simd_type ## _loadu_ps(input8); \
-sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight0, image4)); \
-sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight1, image5)); \
-sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight2, image6)); \
-sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight3, image7)); \
-sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight4, image8));
-
-#define CONVOLVE_6ROWS(simd_type) \
-CONVOLVE_5ROWS(simd_type) \
-image9 = _m ## simd_type ## _loadu_ps(input9); \
-sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight0, image5)); \
-sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight1, image6)); \
-sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight2, image7)); \
-sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight3, image8)); \
-sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight4, image9));
-
-#define CONVOLVE_7ROWS(simd_type) \
-CONVOLVE_6ROWS(simd_type) \
-imageA = _m ## simd_type ## _loadu_ps(inputA); \
-sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight0, image6)); \
-sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight1, image7)); \
-sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight2, image8)); \
-sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight3, image9)); \
-sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight4, imageA));
-
-#define CONVOLVE_8ROWS(simd_type) \
-CONVOLVE_7ROWS(simd_type) \
-imageB = _m ## simd_type ## _loadu_ps(inputB); \
-sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight0, image7)); \
-sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight1, image8)); \
-sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight2, image9)); \
-sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight3, imageA)); \
-sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight4, imageB));
-
-/* Convolution MEGA macro */
-#define DECLARE_SUMX(rows) DECLARE_SUM_ ## rows
-#define LOAD_SUMX(rows) LOAD_SUM_ ## rows
-#define DECLARE_INPUTX(rows) DECLARE_INPUT_ ## rows
-#define DECLARE_IMAGEX(rows) DECLARE_IMAGE_ ## rows
-#define CONVOLVEX(rows) CONVOLVE_ ## rows ## ROWS
-#define INC_INPUTX(rows) INC_INPUT_ ## rows
-#define STORE_SUMX(rows) STORE_SUM_ ## rows
-#define INC_OUTPUTX(rows) INC_OUTPUT_ ## rows
-
-#define CONVOLUTION_LOOP(rows, simd_type, simd_inst_prefex, simd_set, i) \
-DECLARE_SUMX(rows)(simd_type) \
-LOAD_SUMX(rows)(simd_inst_prefex) \
-DECLARE_WEIGHTS(simd_type) \
-DECLARE_INPUT_0(i) \
-DECLARE_INPUTX(rows)() \
-DECLARE_IMAGEX(rows)(simd_type) \
-\
-LOAD_WEIGHTS(0, simd_inst_prefex, simd_set) \
-CONVOLVEX(rows)(simd_inst_prefex) \
-INC_INPUTX(rows)() \
-\
-LOAD_WEIGHTS(1, simd_inst_prefex, simd_set) \
-CONVOLVEX(rows)(simd_inst_prefex) \
-INC_INPUTX(rows)() \
-\
-LOAD_WEIGHTS(2, simd_inst_prefex, simd_set) \
-CONVOLVEX(rows)(simd_inst_prefex) \
-INC_INPUTX(rows)() \
-\
-LOAD_WEIGHTS(3, simd_inst_prefex, simd_set) \
-CONVOLVEX(rows)(simd_inst_prefex) \
-INC_INPUTX(rows)() \
-\
-LOAD_WEIGHTS(4, simd_inst_prefex, simd_set) \
-CONVOLVEX(rows)(simd_inst_prefex) \
-\
-STORE_SUMX(rows)(simd_inst_prefex) \
-\
-INC_OUTPUTX(rows)(sizeof(__ ## simd_type) / sizeof(float))
-
-
-#define CONVOLVE_8COLS_XROWS(rows, i) \
-{ \
-CONVOLUTION_LOOP(rows, m256, m256, _set1_ps, i) \
-}
-
-#define CONVOLVE_4COLS_XROWS(rows, i) \
-{ \
-CONVOLUTION_LOOP(rows, m128, m, _set_ps1, i) \
-}
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.c b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.c
deleted file mode 100644
index da7a4bb20..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.c
+++ /dev/null
@@ -1,127 +0,0 @@
-#if defined(USE_AVX) && defined(__AVX__)
-
-#ifdef _MSC_VER
-#include <intrin.h>
-
-static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
- unsigned int *__ebx, unsigned int *__ecx,
- unsigned int *__edx) {
- unsigned int cpui[4];
- __cpuid(cpui, __level);
- *__eax = cpui[0]; *__ebx = cpui[1]; *__ecx = cpui[2]; *__edx = cpui[3];
- return 1;
-}
-
-static void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) {
- *eax = 0; *edx = 0;
- if (op == 0)
- *eax = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-}
-
-#else
-
-#if __i386__
-#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
-__asm(" pushl %%ebx\n" \
-" cpuid\n" \
-" mov %%ebx,%1\n" \
-" popl %%ebx" \
-: "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
-: "0"(__level))
-#else
-#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
-__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
-: "0"(__level))
-#endif
-
-static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
- unsigned int *__ebx, unsigned int *__ecx,
- unsigned int *__edx) {
- __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx);
- return 1;
-}
-
-static void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) {
- __asm__ __volatile__
- (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
-}
-
-#endif
-
-enum ECPUFeature
-{
- kCPUFeature_SSE = 0x01,
- kCPUFeature_SSE2 = 0x02,
- kCPUFeature_SSE3 = 0x04,
- kCPUFeature_SSE3_S = 0x08,
- kCPUFeature_SSE4_1 = 0x10,
- kCPUFeature_SSE4_2 = 0x20,
- kCPUFeature_AVX = 0x40
-};
-
-static unsigned int checkCPUFeatures() {
- unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- unsigned int features = 0;
- __get_cpuid(1, &eax, &ebx, &ecx, &edx);
- if( (edx & (1 << 25)) != 0 ) {
- features |= kCPUFeature_SSE;
- }
- if( (edx & (1 << 26)) != 0 ) {
- features |= kCPUFeature_SSE2;
- }
- if( (ecx & (1 << 0)) != 0 ) {
- features |= kCPUFeature_SSE3;
- }
- if( (ecx & (1 << 9)) != 0 ) {
- features |= kCPUFeature_SSE3_S;
- }
- if( (ecx & (1 << 19)) != 0 ) {
- features |= kCPUFeature_SSE4_1;
- }
- if( (ecx & (1 << 20)) != 0 ) {
- features |= kCPUFeature_SSE4_2;
- }
- if( (ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0 ) {
- xgetbv(0, &eax, &edx);
- if( (eax & 6) == 6 ) {
- features |= kCPUFeature_AVX;
- }
- }
- return features;
-}
-
-#include <stdio.h>
-
-static int haveCPUFeature(unsigned int feature) {
- static unsigned int sCPUFeatures = 0;
- static int sDetectedCPUFeatures = 0;
- if (!sDetectedCPUFeatures) {
- sDetectedCPUFeatures = 1;
- sCPUFeatures = checkCPUFeatures();
- if ((sCPUFeatures & kCPUFeature_AVX) != 0) {
- printf("torch running avx\n");
- } else {
- printf("torch running sse \n");
- }
- }
- return (sCPUFeatures & feature) != 0;
-}
-
-#endif
-
-void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);
-void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);
-
-void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols) {
-#if defined(USE_AVX) && defined(__AVX__)
- int avx = haveCPUFeature(kCPUFeature_AVX);
- if (avx)
- {
- convolve_5x5_avx(output, input, kernel, outRows, outCols, outCols, inCols);
- }
- else
-#endif
- {
- convolve_5x5_sse(output, input, kernel, outRows, outCols, outCols, inCols);
- }
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.h b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.h
deleted file mode 100644
index 7b9b04c50..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.h
+++ /dev/null
@@ -1 +0,0 @@
-void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols); \ No newline at end of file
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_avx.c b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_avx.c
deleted file mode 100644
index 52b6d0ffb..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_avx.c
+++ /dev/null
@@ -1,212 +0,0 @@
-#include <immintrin.h>
-#include "common_simd.h"
-
-#define CLEAR_AVX() _mm256_zeroupper()
-
-void convolve_5x5_1_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount = count & 0xFFFFFFF8;
- DECLARE_OUTPUT_1()
- for (; i < alignedCount; i+=8) {
- CONVOLVE_8COLS_XROWS(1, i)
- }
-}
-
-void convolve_5x5_2_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount = count & 0xFFFFFFF8;
- DECLARE_OUTPUT_2()
- for (; i < alignedCount; i+=8) {
- CONVOLVE_8COLS_XROWS(2, i)
- }
-}
-
-void convolve_5x5_4_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount = count & 0xFFFFFFF8;
- DECLARE_OUTPUT_4()
- for (; i < alignedCount; i+=8) {
- CONVOLVE_8COLS_XROWS(4, i)
- }
-}
-
-void convolve_5x5_5_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount = count & 0xFFFFFFF8;
- DECLARE_OUTPUT_5()
- for (; i < alignedCount; i+=8) {
- CONVOLVE_8COLS_XROWS(5, i)
- }
-}
-
-void convolve_5x5_6_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount = count & 0xFFFFFFF8;
- DECLARE_OUTPUT_6()
- for (; i < alignedCount; i+=8) {
- CONVOLVE_8COLS_XROWS(6, i)
- }
-}
-
-void convolve_5x5_7_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount = count & 0xFFFFFFF8;
- DECLARE_OUTPUT_7()
- for (; i < alignedCount; i+=8) {
- CONVOLVE_8COLS_XROWS(7, i)
- }
-}
-
-void convolve_5x5_8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount = count & 0xFFFFFFF8;
- DECLARE_OUTPUT_8()
- for (; i < alignedCount; i+=8) {
- CONVOLVE_8COLS_XROWS(8, i)
- }
-}
-
-void convolve_5x5_64x64_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- for(int i = 0; i < 60; i+=6)
- {
- DECLARE_OUTPUT_6()
- CONVOLVE_8COLS_XROWS(6, 0)
- CONVOLVE_8COLS_XROWS(6, 8)
- CONVOLVE_8COLS_XROWS(6, 16)
- CONVOLVE_8COLS_XROWS(6, 24)
- CONVOLVE_8COLS_XROWS(6, 32)
- CONVOLVE_8COLS_XROWS(6, 40)
- CONVOLVE_8COLS_XROWS(6, 48)
- CONVOLVE_8COLS_XROWS(6, 56)
- output += outputStride * 6;
- image += inputStride * 6;
- }
- DECLARE_OUTPUT_4()
- CONVOLVE_8COLS_XROWS(4, 0)
- CONVOLVE_8COLS_XROWS(4, 8)
- CONVOLVE_8COLS_XROWS(4, 16)
- CONVOLVE_8COLS_XROWS(4, 24)
- CONVOLVE_8COLS_XROWS(4, 32)
- CONVOLVE_8COLS_XROWS(4, 40)
- CONVOLVE_8COLS_XROWS(4, 48)
- CONVOLVE_8COLS_XROWS(4, 56)
-}
-
-void convolve_5x5_32x32_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- for(int i = 0; i < 30; i+=6)
- {
- DECLARE_OUTPUT_6()
- CONVOLVE_8COLS_XROWS(6, 0)
- CONVOLVE_8COLS_XROWS(6, 8)
- CONVOLVE_8COLS_XROWS(6, 16)
- CONVOLVE_8COLS_XROWS(6, 24)
- output += outputStride * 6;
- image += inputStride * 6;
- }
- DECLARE_OUTPUT_2()
- CONVOLVE_8COLS_XROWS(2, 0)
- CONVOLVE_8COLS_XROWS(2, 8)
- CONVOLVE_8COLS_XROWS(2, 16)
- CONVOLVE_8COLS_XROWS(2, 24)
-}
-
-void convolve_5x5_16x16_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- for(int i = 0; i < 12; i+=6)
- {
- DECLARE_OUTPUT_6()
- CONVOLVE_8COLS_XROWS(6, 0)
- CONVOLVE_8COLS_XROWS(6, 8)
- output += outputStride * 6;
- image += inputStride * 6;
- }
- DECLARE_OUTPUT_4()
- CONVOLVE_8COLS_XROWS(4, 0)
- CONVOLVE_8COLS_XROWS(4, 8)
-}
-
-void convolve_5x5_8x8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- DECLARE_OUTPUT_8()
- CONVOLVE_8COLS_XROWS(8, 0)
-}
-
-void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);
-
-void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) {
- long ic = inCols;
- long yy = 0;
- float* t_ = input;
- float* r_ = output;
- float* k_ = kernel;
-
- if((outRows == 64) && (outCols == 64)) {
- convolve_5x5_64x64_avx(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-
- if((outRows == 32) && (outCols == 32)) {
- convolve_5x5_32x32_avx(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-
- if((outRows == 16) && (outCols == 16)) {
- convolve_5x5_16x16_avx(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-
- if((outRows == 8) && (outCols == 8)) {
- convolve_5x5_8x8_avx(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-
- for(; yy < (outRows / 6 ) * 6; yy += 6) {
- float *pi_ = t_ + yy*ic;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_6_avx(r_, pis_, pw_, outCols, outStride, ic);
- r_ += (outStride * 6);
- }
-
- // more than 2 rows left to process and we ended up on a non-multiple of 4
- if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) {
- // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop)
- float *pi_ = t_ + yy*ic;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic);
- r_ += (outStride * 2);
- yy += 2;
- }
-
- for(; yy < (outRows & 0xFFFFFFFC); yy += 4) {
- float *pi_ = t_ + yy*ic;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_4_avx(r_, pis_, pw_, outCols, outStride, ic);
- r_ += (outStride * 4);
- }
-
- for(; yy < (outRows & 0xFFFFFFFE); yy += 2) {
- float *pi_ = t_ + yy*ic;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic);
- r_ += (outStride * 2);
- }
-
- for(; yy < outRows; yy += 1) {
- float *pi_ = t_ + yy*ic;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_1_avx(r_, pis_, pw_, outCols, outStride, ic);
- r_ += (outStride * 1);
- }
-
- long procCols = outCols & 0xFFFFFFF8; // avx version processes 8 cols at a time
- long remCols = outCols - procCols;
-
- //process the rest using sse
- if( remCols > 0) {
- CLEAR_AVX();
- convolve_5x5_sse(&output[procCols], &input[procCols], kernel, outRows, remCols, outStride, inCols);
- }
-} \ No newline at end of file
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_sse.c b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_sse.c
deleted file mode 100644
index f34b79695..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_sse.c
+++ /dev/null
@@ -1,320 +0,0 @@
-#include <emmintrin.h>
-#include "common_simd.h"
-
-
-/* SSE variants */
-void convolve_5x5_1_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount4 = count & 0xFFFFFFFC;
- DECLARE_OUTPUT_1()
- for (; i < alignedCount4; i+=4) {
- CONVOLVE_4COLS_XROWS(1, i)
- }
- for (; i < (count); i++) {
- float output0 = output[i + outputStride * 0];
- int row;
- for (row = 0; row < 5; row++) {
- int col;
- for (col = 0; col < 5; col++) {
- output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];
- }
- }
- output[i + outputStride * 0] = output0;
- }
-}
-
-void convolve_5x5_2_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount4 = count & 0xFFFFFFFC;
- DECLARE_OUTPUT_2()
- for (; i < alignedCount4; i+=4) {
- CONVOLVE_4COLS_XROWS(2, i)
- }
- for (; i < (count); i++) {
- float output0 = output[i + outputStride * 0];
- float output1 = output[i + outputStride * 1];
- int row;
- for (row = 0; row < 5; row++) {
- int col;
- for (col = 0; col < 5; col++) {
- output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];
- output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];
- }
- }
- output[i + outputStride * 0] = output0;
- output[i + outputStride * 1] = output1;
- }
-}
-
-void convolve_5x5_4_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount4 = count & 0xFFFFFFFC;
- DECLARE_OUTPUT_4()
- for (; i < alignedCount4; i+=4) {
- CONVOLVE_4COLS_XROWS(4, i)
- }
- for (; i < (count); i++) {
- float output0 = output[i + outputStride * 0];
- float output1 = output[i + outputStride * 1];
- float output2 = output[i + outputStride * 2];
- float output3 = output[i + outputStride * 3];
- int row;
- for (row = 0; row < 5; row++) {
- int col;
- for (col = 0; col < 5; col++) {
- output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];
- output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];
- output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col];
- output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col];
- }
- }
- output[i + outputStride * 0] = output0;
- output[i + outputStride * 1] = output1;
- output[i + outputStride * 2] = output2;
- output[i + outputStride * 3] = output3;
- }
-}
-
-void convolve_5x5_6_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount4 = count & 0xFFFFFFFC;
- DECLARE_OUTPUT_6()
- for (; i < alignedCount4; i+=4) {
- CONVOLVE_4COLS_XROWS(6, i)
- }
- for (; i<(count); i++) {
- float output0 = output[i + outputStride * 0];
- float output1 = output[i + outputStride * 1];
- float output2 = output[i + outputStride * 2];
- float output3 = output[i + outputStride * 3];
- float output4 = output[i + outputStride * 4];
- float output5 = output[i + outputStride * 5];
- int row;
- for (row = 0; row < 5; row++) {
- int col;
- for (col = 0; col < 5; col++) {
- output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];
- output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];
- output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col];
- output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col];
- output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col];
- output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col];
- }
- }
- output[i + outputStride * 0] = output0;
- output[i + outputStride * 1] = output1;
- output[i + outputStride * 2] = output2;
- output[i + outputStride * 3] = output3;
- output[i + outputStride * 4] = output4;
- output[i + outputStride * 5] = output5;
- }
-}
-
-void convolve_5x5_8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- long i = 0;
- long alignedCount4 = count & 0xFFFFFFFC;
- DECLARE_OUTPUT_8()
- for (; i < alignedCount4; i+=4) {
- CONVOLVE_4COLS_XROWS(8, i)
- }
- for (; i<(count); i++) {
- float output0 = output[i + outputStride * 0];
- float output1 = output[i + outputStride * 1];
- float output2 = output[i + outputStride * 2];
- float output3 = output[i + outputStride * 3];
- float output4 = output[i + outputStride * 4];
- float output5 = output[i + outputStride * 5];
- float output6 = output[i + outputStride * 6];
- float output7 = output[i + outputStride * 7];
- int row;
- for (row = 0; row < 5; row++) {
- int col;
- for (col = 0; col < 5; col++) {
- output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];
- output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];
- output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col];
- output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col];
- output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col];
- output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col];
- output6 += weight[5 * row + col] * image[i + (row + 6) * inputStride + col];
- output7 += weight[5 * row + col] * image[i + (row + 7) * inputStride + col];
- }
- }
- output[i + outputStride * 0] = output0;
- output[i + outputStride * 1] = output1;
- output[i + outputStride * 2] = output2;
- output[i + outputStride * 3] = output3;
- output[i + outputStride * 4] = output4;
- output[i + outputStride * 5] = output5;
- output[i + outputStride * 6] = output6;
- output[i + outputStride * 7] = output7;
- }
-}
-
-#define UNROLL_SSE_CONVOLUTION 0
-#if (UNROLL_SSE_CONVOLUTION)
-
-void convolve_5x5_64x64_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- for(int i = 0; i < 60; i+=6)
- {
- DECLARE_OUTPUT_6()
- CONVOLVE_4COLS_XROWS(6, 0)
- CONVOLVE_4COLS_XROWS(6, 4)
- CONVOLVE_4COLS_XROWS(6, 8)
- CONVOLVE_4COLS_XROWS(6, 12)
- CONVOLVE_4COLS_XROWS(6, 16)
- CONVOLVE_4COLS_XROWS(6, 20)
- CONVOLVE_4COLS_XROWS(6, 24)
- CONVOLVE_4COLS_XROWS(6, 28)
- CONVOLVE_4COLS_XROWS(6, 32)
- CONVOLVE_4COLS_XROWS(6, 36)
- CONVOLVE_4COLS_XROWS(6, 40)
- CONVOLVE_4COLS_XROWS(6, 44)
- CONVOLVE_4COLS_XROWS(6, 48)
- CONVOLVE_4COLS_XROWS(6, 52)
- CONVOLVE_4COLS_XROWS(6, 56)
- CONVOLVE_4COLS_XROWS(6, 60)
- output += outputStride * 6;
- image += inputStride * 6;
- }
- DECLARE_OUTPUT_4()
- CONVOLVE_4COLS_XROWS(4, 0)
- CONVOLVE_4COLS_XROWS(4, 4)
- CONVOLVE_4COLS_XROWS(4, 8)
- CONVOLVE_4COLS_XROWS(4, 12)
- CONVOLVE_4COLS_XROWS(4, 16)
- CONVOLVE_4COLS_XROWS(4, 20)
- CONVOLVE_4COLS_XROWS(4, 24)
- CONVOLVE_4COLS_XROWS(4, 28)
- CONVOLVE_4COLS_XROWS(4, 32)
- CONVOLVE_4COLS_XROWS(4, 36)
- CONVOLVE_4COLS_XROWS(4, 40)
- CONVOLVE_4COLS_XROWS(4, 44)
- CONVOLVE_4COLS_XROWS(4, 48)
- CONVOLVE_4COLS_XROWS(4, 52)
- CONVOLVE_4COLS_XROWS(4, 56)
- CONVOLVE_4COLS_XROWS(4, 60)
-}
-
-void convolve_5x5_32x32_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- for(int i = 0; i < 30; i+=6)
- {
- DECLARE_OUTPUT_6()
-
- CONVOLVE_4COLS_XROWS(6, 0)
- CONVOLVE_4COLS_XROWS(6, 4)
- CONVOLVE_4COLS_XROWS(6, 8)
- CONVOLVE_4COLS_XROWS(6, 12)
- CONVOLVE_4COLS_XROWS(6, 16)
- CONVOLVE_4COLS_XROWS(6, 20)
- CONVOLVE_4COLS_XROWS(6, 24)
- CONVOLVE_4COLS_XROWS(6, 28)
-
- output += outputStride * 6;
- image += inputStride * 6;
- }
- DECLARE_OUTPUT_2()
- CONVOLVE_4COLS_XROWS(2, 0)
- CONVOLVE_4COLS_XROWS(2, 4)
- CONVOLVE_4COLS_XROWS(2, 8)
- CONVOLVE_4COLS_XROWS(2, 12)
- CONVOLVE_4COLS_XROWS(2, 16)
- CONVOLVE_4COLS_XROWS(2, 20)
- CONVOLVE_4COLS_XROWS(2, 24)
- CONVOLVE_4COLS_XROWS(2, 28)
-}
-
-void convolve_5x5_16x16_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- for(int i = 0; i < 12; i+=6)
- {
- DECLARE_OUTPUT_6()
- CONVOLVE_4COLS_XROWS(6, 0)
- CONVOLVE_4COLS_XROWS(6, 4)
- CONVOLVE_4COLS_XROWS(6, 8)
- CONVOLVE_4COLS_XROWS(6, 12)
- output += outputStride * 6;
- image += inputStride * 6;
- }
- DECLARE_OUTPUT_4()
- CONVOLVE_4COLS_XROWS(4, 0)
- CONVOLVE_4COLS_XROWS(4, 4)
- CONVOLVE_4COLS_XROWS(4, 8)
- CONVOLVE_4COLS_XROWS(4, 12)
-}
-
-void convolve_5x5_8x8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {
- DECLARE_OUTPUT_8()
- CONVOLVE_4COLS_XROWS(8, 0)
- CONVOLVE_4COLS_XROWS(8, 4)
-}
-
-#endif
-
-void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) {
- long yy = 0;
- float* t_ = input;
- float* r_ = output;
- float* k_ = kernel;
-#if (UNROLL_SSE_CONVOLUTION)
- if((outRows == 64) && (outCols == 64)) {
- convolve_5x5_64x64_sse(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-
- if((outRows == 32) && (outCols == 32)) {
- convolve_5x5_32x32_sse(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-
- if((outRows == 16) && (outCols == 16)) {
- convolve_5x5_16x16_sse(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-
- if((outRows == 8) && (outCols == 8)) {
- convolve_5x5_8x8_sse(output, input, kernel, outRows, outStride, inCols);
- return;
- }
-#endif
- for(; yy < (outRows / 6 ) * 6; yy += 6) {
- float *pi_ = t_ + yy*inCols;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_6_sse(r_, pis_, pw_, outCols, outStride, inCols);
- r_ += (outStride * 6);
- }
- // more than 2 rows left to process and we ended up on a non-multiple of 4
- if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) {
- // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop)
- float *pi_ = t_ + yy*inCols;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols);
- r_ += (outStride * 2);
- yy += 2;
- }
-
- for(; yy < (outRows & 0xFFFFFFFC); yy += 4) {
- float *pi_ = t_ + yy*inCols;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_4_sse(r_, pis_, pw_, outCols, outStride, inCols);
- r_ += (outStride * 4);
- }
-
- for(; yy < (outRows & 0xFFFFFFFE); yy += 2) {
- float *pi_ = t_ + yy*inCols;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols);
- r_ += (outStride * 2);
- }
-
- for(; yy < outRows; yy += 1) {
- float *pi_ = t_ + yy*inCols;
- float *pw_ = k_;
- float *pis_ = pi_;
- convolve_5x5_1_sse(r_, pis_, pw_, outCols, outStride, inCols);
- r_ += (outStride * 1);
- }
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/simd.h b/contrib/lua-torch/torch7/lib/TH/generic/simd/simd.h
deleted file mode 100644
index b1878ad5b..000000000
--- a/contrib/lua-torch/torch7/lib/TH/generic/simd/simd.h
+++ /dev/null
@@ -1,165 +0,0 @@
-#ifndef TH_SIMD_INC
-#define TH_SIMD_INC
-
-#include <stdint.h>
-#include <stdlib.h>
-#if defined(_MSC_VER)
-#include <intrin.h>
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
-#include <cpuid.h>
-#endif
-
-// Can be found on Intel ISA Reference for CPUID
-#define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7
-#define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1
-#define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1
-
-// Helper macros for initialization
-#define FUNCTION_IMPL(NAME, EXT) \
- { .function=(void *)NAME, \
- .supportedSimdExt=EXT \
- }
-
-#define INIT_DISPATCH_PTR(OP) \
- do { \
- int i; \
- for (i = 0; i < sizeof(THVector_(OP ## _DISPATCHTABLE)) / sizeof(FunctionDescription); ++i) { \
- THVector_(OP ## _DISPATCHPTR) = THVector_(OP ## _DISPATCHTABLE)[i].function; \
- if (THVector_(OP ## _DISPATCHTABLE)[i].supportedSimdExt & hostSimdExts) { \
- break; \
- } \
- } \
- } while(0)
-
-
-typedef struct FunctionDescription
-{
- void *function;
- uint32_t supportedSimdExt;
-} FunctionDescription;
-
-
-enum SIMDExtensions
-{
-#if defined(__NEON__)
- SIMDExtension_NEON = 0x1,
-#elif defined(__PPC64__)
- SIMDExtension_VSX = 0x1,
-#else
- SIMDExtension_AVX2 = 0x1,
- SIMDExtension_AVX = 0x2,
- SIMDExtension_SSE = 0x4,
-#endif
- SIMDExtension_DEFAULT = 0x0
-};
-
-
-#if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
-
- #if defined(__NEON__)
-
-static inline uint32_t detectHostSIMDExtensions()
-{
- return SIMDExtension_NEON;
-}
-
- #else //ARM without NEON
-
-static inline uint32_t detectHostSIMDExtensions()
-{
- return SIMDExtension_DEFAULT;
-}
-
- #endif
-
-#elif defined(__PPC64__)
-
- #if defined(__VSX__)
-
-static inline uint32_t detectHostSIMDExtensions()
-{
- uint32_t hostSimdExts = SIMDExtension_DEFAULT;
- char *evar;
-
- evar = getenv("TH_NO_VSX");
- if (evar == NULL || strncmp(evar, "1", 2) != 0)
- hostSimdExts = SIMDExtension_VSX;
- return hostSimdExts;
-}
-
- #else //PPC64 without VSX
-
-static inline uint32_t detectHostSIMDExtensions()
-{
- return SIMDExtension_DEFAULT;
-}
-
- #endif
-
-#else // x86
-static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
-{
-#if defined(_MSC_VER)
- uint32_t cpuInfo[4];
- __cpuid(cpuInfo, *eax);
- *eax = cpuInfo[0];
- *ebx = cpuInfo[1];
- *ecx = cpuInfo[2];
- *edx = cpuInfo[3];
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
- uint32_t level = *eax;
- __get_cpuid (level, eax, ebx, ecx, edx);
-#else
- uint32_t a = *eax, b, c = *ecx, d;
- __asm volatile ( "cpuid\n\t"
- : "+a"(a), "=b"(b), "+c"(c), "=d"(d) );
- *eax = a;
- *ebx = b;
- *ecx = c;
- *edx = d;
-#endif
-}
-
-static inline uint32_t detectHostSIMDExtensions()
-{
- uint32_t eax, ebx, ecx, edx;
- uint32_t hostSimdExts = 0x0;
- int TH_NO_AVX = 1, TH_NO_AVX2 = 1, TH_NO_SSE = 1;
- char *evar;
-
- evar = getenv("TH_NO_AVX2");
- if (evar == NULL || strncmp(evar, "1", 2) != 0)
- TH_NO_AVX2 = 0;
-
- // Check for AVX2. Requires separate CPUID
- eax = 0x7;
- ecx = 0x0;
- cpuid(&eax, &ebx, &ecx, &edx);
- if ((ebx & CPUID_AVX2_BIT) && TH_NO_AVX2 == 0) {
- hostSimdExts |= SIMDExtension_AVX2;
- }
-
- // Detect and enable AVX and SSE
- eax = 0x1;
- cpuid(&eax, &ebx, &ecx, &edx);
-
- evar = getenv("TH_NO_AVX");
- if (evar == NULL || strncmp(evar, "1", 2) != 0)
- TH_NO_AVX = 0;
- if (ecx & CPUID_AVX_BIT && TH_NO_AVX == 0) {
- hostSimdExts |= SIMDExtension_AVX;
- }
-
- evar = getenv("TH_NO_SSE");
- if (evar == NULL || strncmp(evar, "1", 2) != 0)
- TH_NO_SSE = 0;
- if (edx & CPUID_SSE_BIT && TH_NO_SSE == 0) {
- hostSimdExts |= SIMDExtension_SSE;
- }
-
- return hostSimdExts;
-}
-
-#endif // end SIMD extension detection code
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX.c b/contrib/lua-torch/torch7/lib/TH/vector/AVX.c
deleted file mode 100644
index 58c4e6d35..000000000
--- a/contrib/lua-torch/torch7/lib/TH/vector/AVX.c
+++ /dev/null
@@ -1,274 +0,0 @@
-#if defined(USE_AVX) && defined(__AVX__)
-#ifndef _MSC_VER
-#include <x86intrin.h>
-#else
-#include <intrin.h>
-#endif
-
-#include "AVX.h"
-
-void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {
- ptrdiff_t i;
- ptrdiff_t off;
- for (i=0; i<=((n)-8); i+=8) {
- _mm256_storeu_pd(y+i, _mm256_loadu_pd(x+i));
- _mm256_storeu_pd(y+i+4, _mm256_loadu_pd(x+i+4));
- }
- off = (n) - ((n)%8);
- for (i=0; i<((n)%8); i++) {
- y[off+i] = x[off+i];
- }
-}
-
-void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- ptrdiff_t off;
- __m256d YMM0 = _mm256_set_pd(c, c, c, c);
- for (i=0; i<=((n)-16); i+=16) {
- _mm256_storeu_pd((x)+i , YMM0);
- _mm256_storeu_pd((x)+i+4, YMM0);
- _mm256_storeu_pd((x)+i+8, YMM0);
- _mm256_storeu_pd((x)+i+12, YMM0);
- }
- off = (n) - ((n)%16);
- for (i=0; i<((n)%16); i++) {
- x[off+i] = c;
- }
-}
-
-void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256d YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-8); i+=8) {
- YMM0 = _mm256_loadu_pd(x+i);
- YMM1 = _mm256_loadu_pd(x+i+4);
- YMM2 = _mm256_loadu_pd(y+i);
- YMM3 = _mm256_loadu_pd(y+i+4);
- YMM2 = _mm256_div_pd(YMM0, YMM2);
- YMM3 = _mm256_div_pd(YMM1, YMM3);
- _mm256_storeu_pd(z+i, YMM2);
- _mm256_storeu_pd(z+i+4, YMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] / y[i];
- }
-}
-
-void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256d YMM15 = _mm256_set_pd(c, c, c, c);
- __m256d YMM0, YMM1;
- for (i=0; i<=((n)-8); i+=8) {
- YMM0 = _mm256_loadu_pd(x+i);
- YMM1 = _mm256_loadu_pd(x+i+4);
- YMM0 = _mm256_div_pd(YMM0, YMM15);
- YMM1 = _mm256_div_pd(YMM1, YMM15);
- _mm256_storeu_pd(y+i, YMM0);
- _mm256_storeu_pd(y+i+4, YMM1);
- }
- for (; i<(n); i++) {
- y[i] = x[i] / c;
- }
-}
-
-void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256d YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-8); i+=8) {
- YMM0 = _mm256_loadu_pd(x+i);
- YMM1 = _mm256_loadu_pd(x+i+4);
- YMM2 = _mm256_loadu_pd(y+i);
- YMM3 = _mm256_loadu_pd(y+i+4);
- YMM2 = _mm256_mul_pd(YMM0, YMM2);
- YMM3 = _mm256_mul_pd(YMM1, YMM3);
- _mm256_storeu_pd(z+i, YMM2);
- _mm256_storeu_pd(z+i+4, YMM3);
- }
- for (; i<n; i++) {
- z[i] = x[i] * y[i];
- }
-}
-
-void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256d YMM15 = _mm256_set_pd(c, c, c, c);
- __m256d YMM0, YMM1;
- for (i=0; i<=((n)-8); i+=8) {
- YMM0 = _mm256_loadu_pd(x+i);
- YMM1 = _mm256_loadu_pd(x+i+4);
- YMM0 = _mm256_mul_pd(YMM0, YMM15);
- YMM1 = _mm256_mul_pd(YMM1, YMM15);
- _mm256_storeu_pd(y+i, YMM0);
- _mm256_storeu_pd(y+i+4, YMM1);
- }
- for (; i<n; i++) {
- y[i] = x[i] * c;
- }
-}
-
-void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256d YMM15 = _mm256_set_pd(c, c, c, c);
- __m256d YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-4); i+=4) {
- YMM0 = _mm256_loadu_pd(y+i);
- YMM1 = _mm256_loadu_pd(x+i);
- YMM2 = _mm256_mul_pd(YMM0, YMM15);
- YMM3 = _mm256_add_pd(YMM1, YMM2);
- _mm256_storeu_pd(z+i, YMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] + y[i] * c;
- }
-}
-
-void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256d YMM15 = _mm256_set_pd(c, c, c, c);
- __m256d YMM0, YMM1;
- for (i=0; i<=((n)-8); i+=8) {
- YMM0 = _mm256_loadu_pd(x+i);
- YMM1 = _mm256_loadu_pd(x+i+4);
- YMM0 = _mm256_add_pd(YMM0, YMM15);
- YMM1 = _mm256_add_pd(YMM1, YMM15);
- _mm256_storeu_pd(y+i, YMM0);
- _mm256_storeu_pd(y+i+4, YMM1);
- }
- for (; i<(n); i++) {
- y[i] = x[i] + c;
- }
-}
-
-void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {
- ptrdiff_t i;
- ptrdiff_t off;
- for (i=0; i<=((n)-16); i+=16) {
- _mm256_storeu_ps(y+i, _mm256_loadu_ps(x+i));
- _mm256_storeu_ps(y+i+8, _mm256_loadu_ps(x+i+8));
- }
- off = (n) - ((n)%16);
- for (i=0; i<((n)%16); i++) {
- y[off+i] = x[off+i];
- }
-}
-
-void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- ptrdiff_t off;
- __m256 YMM0 = _mm256_set_ps(c, c, c, c, c, c, c, c);
- for (i=0; i<=((n)-32); i+=32) {
- _mm256_storeu_ps((x)+i , YMM0);
- _mm256_storeu_ps((x)+i+8, YMM0);
- _mm256_storeu_ps((x)+i+16, YMM0);
- _mm256_storeu_ps((x)+i+24, YMM0);
- }
- off = (n) - ((n)%32);
- for (i=0; i<((n)%32); i++) {
- x[off+i] = c;
- }
-}
-
-void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256 YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-16); i+=16) {
- YMM0 = _mm256_loadu_ps(x+i);
- YMM1 = _mm256_loadu_ps(x+i+8);
- YMM2 = _mm256_loadu_ps(y+i);
- YMM3 = _mm256_loadu_ps(y+i+8);
- YMM2 = _mm256_div_ps(YMM0, YMM2);
- YMM3 = _mm256_div_ps(YMM1, YMM3);
- _mm256_storeu_ps(z+i, YMM2);
- _mm256_storeu_ps(z+i+8, YMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] / y[i];
- }
-}
-
-void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
- __m256 YMM0, YMM1;
- for (i=0; i<=((n)-16); i+=16) {
- YMM0 = _mm256_loadu_ps(x+i);
- YMM1 = _mm256_loadu_ps(x+i+8);
- YMM0 = _mm256_div_ps(YMM0, YMM15);
- YMM1 = _mm256_div_ps(YMM1, YMM15);
- _mm256_storeu_ps(y+i, YMM0);
- _mm256_storeu_ps(y+i+8, YMM1);
- }
- for (; i<(n); i++) {
- y[i] = x[i] / c;
- }
-}
-
-void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256 YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-16); i+=16) {
- YMM0 = _mm256_loadu_ps(x+i);
- YMM1 = _mm256_loadu_ps(x+i+8);
- YMM2 = _mm256_loadu_ps(y+i);
- YMM3 = _mm256_loadu_ps(y+i+8);
- YMM2 = _mm256_mul_ps(YMM0, YMM2);
- YMM3 = _mm256_mul_ps(YMM1, YMM3);
- _mm256_storeu_ps(z+i, YMM2);
- _mm256_storeu_ps(z+i+8, YMM3);
- }
- for (; i<n; i++) {
- z[i] = x[i] * y[i];
- }
-}
-
-void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
- __m256 YMM0, YMM1;
- for (i=0; i<=((n)-16); i+=16) {
- YMM0 = _mm256_loadu_ps(x+i);
- YMM1 = _mm256_loadu_ps(x+i+8);
- YMM0 = _mm256_mul_ps(YMM0, YMM15);
- YMM1 = _mm256_mul_ps(YMM1, YMM15);
- _mm256_storeu_ps(y+i, YMM0);
- _mm256_storeu_ps(y+i+8, YMM1);
- }
- for (; i<n; i++) {
- y[i] = x[i] * c;
- }
-}
-
-void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
- __m256 YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-8); i+=8) {
- YMM0 = _mm256_loadu_ps(y+i);
- YMM1 = _mm256_loadu_ps(x+i);
- YMM2 = _mm256_mul_ps(YMM0, YMM15);
- YMM3 = _mm256_add_ps(YMM1, YMM2);
- _mm256_storeu_ps(z+i, YMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] + y[i] * c;
- }
-}
-
-void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
- __m256 YMM0, YMM1;
- for (i=0; i<=((n)-16); i+=16) {
- YMM0 = _mm256_loadu_ps(x+i);
- YMM1 = _mm256_loadu_ps(x+i+8);
- YMM0 = _mm256_add_ps(YMM0, YMM15);
- YMM1 = _mm256_add_ps(YMM1, YMM15);
- _mm256_storeu_ps(y+i, YMM0);
- _mm256_storeu_ps(y+i+8, YMM1);
- }
- for (; i<(n); i++) {
- y[i] = x[i] + c;
- }
-}
-
-#endif // defined(__AVX__)
diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX.h b/contrib/lua-torch/torch7/lib/TH/vector/AVX.h
deleted file mode 100644
index bfaeaa6b0..000000000
--- a/contrib/lua-torch/torch7/lib/TH/vector/AVX.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef TH_AVX_H
-#define TH_AVX_H
-
-#include <stddef.h>
-
-void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n);
-void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n);
-void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
-void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
-void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
-void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
-void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
-void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
-void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n);
-void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n);
-void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
-void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
-void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
-void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
-void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
-void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.c b/contrib/lua-torch/torch7/lib/TH/vector/AVX2.c
deleted file mode 100644
index 082a680ea..000000000
--- a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.c
+++ /dev/null
@@ -1,47 +0,0 @@
-#if defined(__AVX2__)
-#ifndef _MSC_VER
-#include <x86intrin.h>
-#else
-#include <intrin.h>
-#endif
-#include "AVX2.h"
-
-void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256d YMM15 = _mm256_set_pd(c, c, c, c);
- __m256d YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-8); i+=8) {
- YMM0 = _mm256_loadu_pd(y+i);
- YMM1 = _mm256_loadu_pd(y+i+4);
- YMM2 = _mm256_loadu_pd(x+i);
- YMM3 = _mm256_loadu_pd(x+i+4);
- YMM2 = _mm256_fmadd_pd(YMM0, YMM15, YMM2);
- YMM3 = _mm256_fmadd_pd(YMM1, YMM15, YMM3);
- _mm256_storeu_pd(z+i, YMM2);
- _mm256_storeu_pd(z+i+4, YMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] + y[i] * c;
- }
-}
-
-void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
- __m256 YMM0, YMM1, YMM2, YMM3;
- for (i=0; i<=((n)-16); i+=16) {
- YMM0 = _mm256_loadu_ps(y+i);
- YMM1 = _mm256_loadu_ps(y+i+8);
- YMM2 = _mm256_loadu_ps(x+i);
- YMM3 = _mm256_loadu_ps(x+i+8);
- YMM2 = _mm256_fmadd_ps(YMM0, YMM15, YMM2);
- YMM3 = _mm256_fmadd_ps(YMM1, YMM15, YMM3);
- _mm256_storeu_ps(z+i, YMM2);
- _mm256_storeu_ps(z+i+8, YMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] + y[i] * c;
- }
-}
-
-#endif // defined(__AVX2__)
diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.h b/contrib/lua-torch/torch7/lib/TH/vector/AVX2.h
deleted file mode 100644
index 85a9e93ee..000000000
--- a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef TH_AVX2_H
-#define TH_AVX2_H
-
-#include <stddef.h>
-
-void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
-void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/TH/vector/NEON.c b/contrib/lua-torch/torch7/lib/TH/vector/NEON.c
deleted file mode 100644
index 7920fb13b..000000000
--- a/contrib/lua-torch/torch7/lib/TH/vector/NEON.c
+++ /dev/null
@@ -1,105 +0,0 @@
-static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) {
- long i = 0;
-
- for(; i < n-4; i += 4)
- {
- x[i] = c;
- x[i+1] = c;
- x[i+2] = c;
- x[i+3] = c;
- }
-
- for(; i < n; i++)
- x[i] = c;
-
-}
-
-static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) {
- long i = 0;
-
- for(; i < n-4; i += 4)
- {
- z[i] = x[i] * y[i];
- z[i+1] = x[i+1] * y[i+1];
- z[i+2] = x[i+2] * y[i+2];
- z[i+3] = x[i+3] * y[i+3];
- }
-
- for(; i < n; i++)
- z[i] = x[i] * y[i];
-}
-
-static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
- long i = 0;
-
- for(; i < n-4; i += 4)
- {
- y[i] = x[i] * c;
- y[i+1] = x[i+1] * c;
- y[i+2] = x[i+2] * c;
- y[i+3] = x[i+3] * c;
- }
-
- for(; i < n; i++)
- y[i] = x[i] * c;
-}
-
-static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
- long i = 0;
-
- for(;i < n-4; i += 4)
- {
- z[i] = x[i] + c * y[i];
- z[i+1] = x[i+1] + c * y[i+1];
- z[i+2] = x[i+2] + c * y[i+2];
- z[i+3] = x[i+3] + c * y[i+3];
- }
-
- for(; i < n; i++)
- z[i] = x[i] + c * y[i];
-}
-
-static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
- long i = 0;
-
- for(;i < n-4; i += 4)
- {
- y[i] = x[i] + c;
- y[i+1] = x[i+1] + c;
- y[i+2] = x[i+2] + c;
- y[i+3] = x[i+3] + c;
- }
-
- for(; i < n; i++)
- y[i] = x[i] + c;
-}
-
-static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) {
- long i = 0;
-
- for(;i < n-4; i += 4)
- {
- z[i] = x[i] / y[i];
- z[i+1] = x[i+1] / y[i+1];
- z[i+2] = x[i+2] / y[i+2];
- z[i+3] = x[i+3] / y[i+3];
- }
-
- for(; i < n; i++)
- z[i] = x[i] / y[i];
-}
-
-static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {
- long i = 0;
-
- for(;i < n-4; i += 4)
- {
- y[i] = x[i] / c;
- y[i+1] = x[i+1] / c;
- y[i+2] = x[i+2] / c;
- y[i+3] = x[i+3] / c;
- }
-
- for(; i < n; i++)
- y[i] = x[i] / c;
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/vector/SSE.c b/contrib/lua-torch/torch7/lib/TH/vector/SSE.c
deleted file mode 100644
index d026935ab..000000000
--- a/contrib/lua-torch/torch7/lib/TH/vector/SSE.c
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef _MSC_VER
-#include <x86intrin.h>
-#else
-#include <intrin.h>
-#endif
-
-static void THDoubleVector_fill_SSE(double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- ptrdiff_t off;
- __m128d XMM0 = _mm_set1_pd(c);
- for (i=0; i<=((n)-8); i+=8) {
- _mm_storeu_pd((x)+i , XMM0);
- _mm_storeu_pd((x)+i+2, XMM0);
- _mm_storeu_pd((x)+i+4, XMM0);
- _mm_storeu_pd((x)+i+6, XMM0);
- }
- off = (n) - ((n)%8);
- for (i=0; i<((n)%8); i++) {
- x[off+i] = c;
- }
-}
-
-static void THDoubleVector_cadd_SSE(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128d XMM7 = _mm_set1_pd(c);
- __m128d XMM0, XMM2;
- for (i=0; i<=((n)-2); i+=2) {
- XMM0 = _mm_loadu_pd((x)+i);
- XMM2 = _mm_loadu_pd((y)+i);
- XMM2 = _mm_mul_pd(XMM2, XMM7);
- XMM2 = _mm_add_pd(XMM0, XMM2);
- _mm_storeu_pd((z)+i, XMM2);
- }
- for (; i<(n); i++) {
- z[i] = x[i] + c * y[i];
- }
-}
-
-static void THDoubleVector_adds_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128d XMM7 = _mm_set1_pd(c);
- __m128d XMM0, XMM2;
- for (i=0; i<=((n)-4); i+=4) {
- XMM0 = _mm_loadu_pd((x)+i);
- XMM2 = _mm_loadu_pd((x)+i+2);
- XMM0 = _mm_add_pd(XMM0, XMM7);
- XMM2 = _mm_add_pd(XMM2, XMM7);
- _mm_storeu_pd((y)+i, XMM0);
- _mm_storeu_pd((y)+i+2, XMM2);
- }
- for (; i<(n); i++) {
- y[i] = x[i] + c;
- }
-}
-
-static void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) {
- ptrdiff_t i;
- for (i=0; i<=((n)-8); i+=8) {
- __m128d XMM0 = _mm_loadu_pd((x)+i );
- __m128d XMM1 = _mm_loadu_pd((x)+i+2);
- __m128d XMM2 = _mm_loadu_pd((x)+i+4);
- __m128d XMM3 = _mm_loadu_pd((x)+i+6);
- __m128d XMM4 = _mm_loadu_pd((y)+i );
- __m128d XMM5 = _mm_loadu_pd((y)+i+2);
- __m128d XMM6 = _mm_loadu_pd((y)+i+4);
- __m128d XMM7 = _mm_loadu_pd((y)+i+6);
- XMM4 = _mm_mul_pd(XMM4, XMM0);
- XMM5 = _mm_mul_pd(XMM5, XMM1);
- XMM6 = _mm_mul_pd(XMM6, XMM2);
- XMM7 = _mm_mul_pd(XMM7, XMM3);
- _mm_storeu_pd((z)+i , XMM4);
- _mm_storeu_pd((z)+i+2, XMM5);
- _mm_storeu_pd((z)+i+4, XMM6);
- _mm_storeu_pd((z)+i+6, XMM7);
- }
- for (; i<(n); i++) {
- z[i] = x[i] * y[i];
- }
-}
-
-static void THDoubleVector_muls_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128d XMM15 = _mm_set1_pd(c);
- for (i=0; i<=((n)-8); i+=8) {
- __m128d XMM0 = _mm_loadu_pd((x)+i );
- __m128d XMM1 = _mm_loadu_pd((x)+i+2);
- __m128d XMM2 = _mm_loadu_pd((x)+i+4);
- __m128d XMM3 = _mm_loadu_pd((x)+i+6);
- __m128d XMM4 = _mm_mul_pd(XMM15, XMM0);
- __m128d XMM5 = _mm_mul_pd(XMM15, XMM1);
- __m128d XMM6 = _mm_mul_pd(XMM15, XMM2);
- __m128d XMM7 = _mm_mul_pd(XMM15, XMM3);
- _mm_storeu_pd((y)+i , XMM4);
- _mm_storeu_pd((y)+i+2, XMM5);
- _mm_storeu_pd((y)+i+4, XMM6);
- _mm_storeu_pd((y)+i+6, XMM7);
- }
- for (; i<(n); i++) {
- y[i] = x[i] * c;
- }
-}
-
-static void THDoubleVector_cdiv_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128d XMM0, XMM1, XMM2, XMM3;
- for (i=0; i<=((n)-4); i+=4) {
- XMM0 = _mm_loadu_pd(x+i);
- XMM1 = _mm_loadu_pd(x+i+2);
- XMM2 = _mm_loadu_pd(y+i);
- XMM3 = _mm_loadu_pd(y+i+2);
- XMM2 = _mm_div_pd(XMM0, XMM2);
- XMM3 = _mm_div_pd(XMM1, XMM3);
- _mm_storeu_pd(z+i, XMM2);
- _mm_storeu_pd(z+i+2, XMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] / y[i];
- }
-}
-
-static void THDoubleVector_divs_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128d XMM7 = _mm_set1_pd(c);
- __m128d XMM0, XMM1;
- for (i=0; i<=((n)-4); i+=4) {
- XMM0 = _mm_loadu_pd(x+i);
- XMM1 = _mm_loadu_pd(x+i+2);
- XMM0 = _mm_div_pd(XMM0, XMM7);
- XMM1 = _mm_div_pd(XMM1, XMM7);
- _mm_storeu_pd(y+i, XMM0);
- _mm_storeu_pd(y+i+2, XMM1);
- }
- for (; i<(n); i++) {
- y[i] = x[i] / c;
- }
-}
-
-static void THFloatVector_fill_SSE(float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128 XMM0 = _mm_set_ps1(c);
- ptrdiff_t off;
- for (i=0; i<=((n)-16); i+=16) {
- _mm_storeu_ps((x)+i , XMM0);
- _mm_storeu_ps((x)+i+4, XMM0);
- _mm_storeu_ps((x)+i+8, XMM0);
- _mm_storeu_ps((x)+i+12, XMM0);
- }
- off = (n) - ((n)%16);
- for (i=0; i<((n)%16); i++) {
- x[off+i] = c;
- }
-}
-
-
-static void THFloatVector_cadd_SSE(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128 XMM7 = _mm_set_ps1(c);
- __m128 XMM0, XMM2;
- for (i=0; i<=((n)-4); i+=4) {
- XMM0 = _mm_loadu_ps((x)+i);
- XMM2 = _mm_loadu_ps((y)+i);
- XMM2 = _mm_mul_ps(XMM2, XMM7);
- XMM2 = _mm_add_ps(XMM0, XMM2);
- _mm_storeu_ps((z)+i, XMM2);
- }
- for (; i<(n); i++) {
- z[i] = x[i] + c * y[i];
- }
-}
-
-static void THFloatVector_adds_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128 XMM7 = _mm_set1_ps(c);
- __m128 XMM0, XMM2;
- for (i=0; i<=((n)-8); i+=8) {
- XMM0 = _mm_loadu_ps((x)+i);
- XMM2 = _mm_loadu_ps((x)+i+4);
- XMM0 = _mm_add_ps(XMM0, XMM7);
- XMM2 = _mm_add_ps(XMM2, XMM7);
- _mm_storeu_ps((y)+i, XMM0);
- _mm_storeu_ps((y)+i+4, XMM2);
- }
- for (; i<(n); i++) {
- y[i] = x[i] + c;
- }
-}
-
-static void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) {
- ptrdiff_t i;
- for (i=0; i<=((n)-16); i+=16) {
- __m128 XMM0 = _mm_loadu_ps((x)+i );
- __m128 XMM1 = _mm_loadu_ps((x)+i+ 4);
- __m128 XMM2 = _mm_loadu_ps((x)+i+ 8);
- __m128 XMM3 = _mm_loadu_ps((x)+i+12);
- __m128 XMM4 = _mm_loadu_ps((y)+i );
- __m128 XMM5 = _mm_loadu_ps((y)+i+ 4);
- __m128 XMM6 = _mm_loadu_ps((y)+i+ 8);
- __m128 XMM7 = _mm_loadu_ps((y)+i+12);
- XMM4 = _mm_mul_ps(XMM4, XMM0);
- XMM5 = _mm_mul_ps(XMM5, XMM1);
- XMM6 = _mm_mul_ps(XMM6, XMM2);
- XMM7 = _mm_mul_ps(XMM7, XMM3);
- _mm_storeu_ps((z)+i , XMM4);
- _mm_storeu_ps((z)+i+ 4, XMM5);
- _mm_storeu_ps((z)+i+ 8, XMM6);
- _mm_storeu_ps((z)+i+12, XMM7);
- }
- for (; i<(n); i++) {
- z[i] = x[i] * y[i];
- }
-}
-
-static void THFloatVector_muls_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128 XMM15 = _mm_set_ps1(c);
- for (i=0; i<=((n)-16); i+=16) {
- __m128 XMM0 = _mm_loadu_ps((x)+i );
- __m128 XMM1 = _mm_loadu_ps((x)+i+ 4);
- __m128 XMM2 = _mm_loadu_ps((x)+i+ 8);
- __m128 XMM3 = _mm_loadu_ps((x)+i+12);
- __m128 XMM4 = _mm_mul_ps(XMM15, XMM0);
- __m128 XMM5 = _mm_mul_ps(XMM15, XMM1);
- __m128 XMM6 = _mm_mul_ps(XMM15, XMM2);
- __m128 XMM7 = _mm_mul_ps(XMM15, XMM3);
- _mm_storeu_ps((y)+i , XMM4);
- _mm_storeu_ps((y)+i+ 4, XMM5);
- _mm_storeu_ps((y)+i+ 8, XMM6);
- _mm_storeu_ps((y)+i+12, XMM7);
- }
- for (; i<(n); i++) {
- y[i] = x[i] * c;
- }
-}
-
-static void THFloatVector_cdiv_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128 XMM0, XMM1, XMM2, XMM3;
- for (i=0; i<=((n)-8); i+=8) {
- XMM0 = _mm_loadu_ps(x+i);
- XMM1 = _mm_loadu_ps(x+i+4);
- XMM2 = _mm_loadu_ps(y+i);
- XMM3 = _mm_loadu_ps(y+i+4);
- XMM2 = _mm_div_ps(XMM0, XMM2);
- XMM3 = _mm_div_ps(XMM1, XMM3);
- _mm_storeu_ps(z+i, XMM2);
- _mm_storeu_ps(z+i+4, XMM3);
- }
- for (; i<(n); i++) {
- z[i] = x[i] / y[i];
- }
-}
-
-static void THFloatVector_divs_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {
- ptrdiff_t i;
- __m128 XMM7 = _mm_set1_ps(c);
- __m128 XMM0, XMM1;
- for (i=0; i<=((n)-8); i+=8) {
- XMM0 = _mm_loadu_ps(x+i);
- XMM1 = _mm_loadu_ps(x+i+4);
- XMM0 = _mm_div_ps(XMM0, XMM7);
- XMM1 = _mm_div_ps(XMM1, XMM7);
- _mm_storeu_ps(y+i, XMM0);
- _mm_storeu_ps(y+i+4, XMM1);
- }
- for (; i<(n); i++) {
- y[i] = x[i] / c;
- }
-}
diff --git a/contrib/lua-torch/torch7/lib/TH/vector/VSX.c b/contrib/lua-torch/torch7/lib/TH/vector/VSX.c
deleted file mode 100644
index 9ff984ad7..000000000
--- a/contrib/lua-torch/torch7/lib/TH/vector/VSX.c
+++ /dev/null
@@ -1,2520 +0,0 @@
-#ifdef __PPC64__
-#include <altivec.h>
-#include <stddef.h>
-
-
-//--------------------------------------------------------------------------------------------------
-// THDoubleVector_fill_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THDoubleVector_fill_VSX(double *x, const double c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- double val[2] = {c, c};
- vector double fp64vec2 = vec_xl(0, val);
-
- for (i = 0; i <= n-128; i += 128)
- {
- vec_xst(fp64vec2, 0, x+(i ));
- vec_xst(fp64vec2, 0, x+(i+2 ));
- vec_xst(fp64vec2, 0, x+(i+4 ));
- vec_xst(fp64vec2, 0, x+(i+6 ));
- vec_xst(fp64vec2, 0, x+(i+8 ));
- vec_xst(fp64vec2, 0, x+(i+10 ));
- vec_xst(fp64vec2, 0, x+(i+12 ));
- vec_xst(fp64vec2, 0, x+(i+14 ));
- vec_xst(fp64vec2, 0, x+(i+16 ));
- vec_xst(fp64vec2, 0, x+(i+18 ));
- vec_xst(fp64vec2, 0, x+(i+20 ));
- vec_xst(fp64vec2, 0, x+(i+22 ));
- vec_xst(fp64vec2, 0, x+(i+24 ));
- vec_xst(fp64vec2, 0, x+(i+26 ));
- vec_xst(fp64vec2, 0, x+(i+28 ));
- vec_xst(fp64vec2, 0, x+(i+30 ));
- vec_xst(fp64vec2, 0, x+(i+32 ));
- vec_xst(fp64vec2, 0, x+(i+34 ));
- vec_xst(fp64vec2, 0, x+(i+36 ));
- vec_xst(fp64vec2, 0, x+(i+38 ));
- vec_xst(fp64vec2, 0, x+(i+40 ));
- vec_xst(fp64vec2, 0, x+(i+42 ));
- vec_xst(fp64vec2, 0, x+(i+44 ));
- vec_xst(fp64vec2, 0, x+(i+46 ));
- vec_xst(fp64vec2, 0, x+(i+48 ));
- vec_xst(fp64vec2, 0, x+(i+50 ));
- vec_xst(fp64vec2, 0, x+(i+52 ));
- vec_xst(fp64vec2, 0, x+(i+54 ));
- vec_xst(fp64vec2, 0, x+(i+56 ));
- vec_xst(fp64vec2, 0, x+(i+58 ));
- vec_xst(fp64vec2, 0, x+(i+60 ));
- vec_xst(fp64vec2, 0, x+(i+62 ));
- vec_xst(fp64vec2, 0, x+(i+64 ));
- vec_xst(fp64vec2, 0, x+(i+66 ));
- vec_xst(fp64vec2, 0, x+(i+68 ));
- vec_xst(fp64vec2, 0, x+(i+70 ));
- vec_xst(fp64vec2, 0, x+(i+72 ));
- vec_xst(fp64vec2, 0, x+(i+74 ));
- vec_xst(fp64vec2, 0, x+(i+76 ));
- vec_xst(fp64vec2, 0, x+(i+78 ));
- vec_xst(fp64vec2, 0, x+(i+80 ));
- vec_xst(fp64vec2, 0, x+(i+82 ));
- vec_xst(fp64vec2, 0, x+(i+84 ));
- vec_xst(fp64vec2, 0, x+(i+86 ));
- vec_xst(fp64vec2, 0, x+(i+88 ));
- vec_xst(fp64vec2, 0, x+(i+90 ));
- vec_xst(fp64vec2, 0, x+(i+92 ));
- vec_xst(fp64vec2, 0, x+(i+94 ));
- vec_xst(fp64vec2, 0, x+(i+96 ));
- vec_xst(fp64vec2, 0, x+(i+98 ));
- vec_xst(fp64vec2, 0, x+(i+100));
- vec_xst(fp64vec2, 0, x+(i+102));
- vec_xst(fp64vec2, 0, x+(i+104));
- vec_xst(fp64vec2, 0, x+(i+106));
- vec_xst(fp64vec2, 0, x+(i+108));
- vec_xst(fp64vec2, 0, x+(i+110));
- vec_xst(fp64vec2, 0, x+(i+112));
- vec_xst(fp64vec2, 0, x+(i+114));
- vec_xst(fp64vec2, 0, x+(i+116));
- vec_xst(fp64vec2, 0, x+(i+118));
- vec_xst(fp64vec2, 0, x+(i+120));
- vec_xst(fp64vec2, 0, x+(i+122));
- vec_xst(fp64vec2, 0, x+(i+124));
- vec_xst(fp64vec2, 0, x+(i+126));
- }
- for (; i <= n-16; i += 16)
- {
- vec_xst(fp64vec2, 0, x+(i ));
- vec_xst(fp64vec2, 0, x+(i+2 ));
- vec_xst(fp64vec2, 0, x+(i+4 ));
- vec_xst(fp64vec2, 0, x+(i+6 ));
- vec_xst(fp64vec2, 0, x+(i+8 ));
- vec_xst(fp64vec2, 0, x+(i+10 ));
- vec_xst(fp64vec2, 0, x+(i+12 ));
- vec_xst(fp64vec2, 0, x+(i+14 ));
- }
- for (; i <= n-2; i += 2)
- vec_xst(fp64vec2, 0, x+(i ));
- for (; i < n; i++)
- x[i] = c;
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THDoubleVector_cadds_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THDoubleVector_cadd_VSX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- double val[2] = {c, c};
- vector double c_fp64vec2 = vec_xl(0, val);
-
- vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;
- vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;
- vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;
- vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;
-
-
- for (i = 0; i <= n-24; i += 24)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- y1_fp64vec2 = vec_xl(0, y+(i+2 ));
- y2_fp64vec2 = vec_xl(0, y+(i+4 ));
- y3_fp64vec2 = vec_xl(0, y+(i+6 ));
- y4_fp64vec2 = vec_xl(0, y+(i+8 ));
- y5_fp64vec2 = vec_xl(0, y+(i+10));
- y6_fp64vec2 = vec_xl(0, y+(i+12));
- y7_fp64vec2 = vec_xl(0, y+(i+14));
- y8_fp64vec2 = vec_xl(0, y+(i+16));
- y9_fp64vec2 = vec_xl(0, y+(i+18));
- y10_fp64vec2 = vec_xl(0, y+(i+20));
- y11_fp64vec2 = vec_xl(0, y+(i+22));
-
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
- x4_fp64vec2 = vec_xl(0, x+(i+8 ));
- x5_fp64vec2 = vec_xl(0, x+(i+10));
- x6_fp64vec2 = vec_xl(0, x+(i+12));
- x7_fp64vec2 = vec_xl(0, x+(i+14));
- x8_fp64vec2 = vec_xl(0, x+(i+16));
- x9_fp64vec2 = vec_xl(0, x+(i+18));
- x10_fp64vec2 = vec_xl(0, x+(i+20));
- x11_fp64vec2 = vec_xl(0, x+(i+22));
-
- y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2);
- y1_fp64vec2 = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2);
- y2_fp64vec2 = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2);
- y3_fp64vec2 = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2);
- y4_fp64vec2 = vec_madd(y4_fp64vec2, c_fp64vec2, x4_fp64vec2);
- y5_fp64vec2 = vec_madd(y5_fp64vec2, c_fp64vec2, x5_fp64vec2);
- y6_fp64vec2 = vec_madd(y6_fp64vec2, c_fp64vec2, x6_fp64vec2);
- y7_fp64vec2 = vec_madd(y7_fp64vec2, c_fp64vec2, x7_fp64vec2);
- y8_fp64vec2 = vec_madd(y8_fp64vec2, c_fp64vec2, x8_fp64vec2);
- y9_fp64vec2 = vec_madd(y9_fp64vec2, c_fp64vec2, x9_fp64vec2);
- y10_fp64vec2 = vec_madd(y10_fp64vec2, c_fp64vec2,x10_fp64vec2);
- y11_fp64vec2 = vec_madd(y11_fp64vec2, c_fp64vec2,x11_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, z+(i ));
- vec_xst(y1_fp64vec2, 0, z+(i+2 ));
- vec_xst(y2_fp64vec2, 0, z+(i+4 ));
- vec_xst(y3_fp64vec2, 0, z+(i+6 ));
- vec_xst(y4_fp64vec2, 0, z+(i+8 ));
- vec_xst(y5_fp64vec2, 0, z+(i+10));
- vec_xst(y6_fp64vec2, 0, z+(i+12));
- vec_xst(y7_fp64vec2, 0, z+(i+14));
- vec_xst(y8_fp64vec2, 0, z+(i+16));
- vec_xst(y9_fp64vec2, 0, z+(i+18));
- vec_xst(y10_fp64vec2, 0, z+(i+20));
- vec_xst(y11_fp64vec2, 0, z+(i+22));
- }
- for (; i <= n-8; i += 8)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- y1_fp64vec2 = vec_xl(0, y+(i+2 ));
- y2_fp64vec2 = vec_xl(0, y+(i+4 ));
- y3_fp64vec2 = vec_xl(0, y+(i+6 ));
-
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
-
- y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2);
- y1_fp64vec2 = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2);
- y2_fp64vec2 = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2);
- y3_fp64vec2 = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, z+(i ));
- vec_xst(y1_fp64vec2, 0, z+(i+2 ));
- vec_xst(y2_fp64vec2, 0, z+(i+4 ));
- vec_xst(y3_fp64vec2, 0, z+(i+6 ));
- }
- for (; i <= n-2; i += 2)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- x0_fp64vec2 = vec_xl(0, x+(i ));
- y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2);
- vec_xst(y0_fp64vec2, 0, z+(i ));
- }
- for (; i < n; i++)
- z[i] = x[i] + c* y[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THDoubleVector_adds_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THDoubleVector_adds_VSX(double *y, const double *x, const double c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- double val[2] = {c, c};
- vector double c_fp64vec2 = vec_xl(0, val);
-
- vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;
- vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;
- vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;
- vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;
-
-
- for (i = 0; i <= n-24; i += 24)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
- x4_fp64vec2 = vec_xl(0, x+(i+8 ));
- x5_fp64vec2 = vec_xl(0, x+(i+10));
- x6_fp64vec2 = vec_xl(0, x+(i+12));
- x7_fp64vec2 = vec_xl(0, x+(i+14));
- x8_fp64vec2 = vec_xl(0, x+(i+16));
- x9_fp64vec2 = vec_xl(0, x+(i+18));
- x10_fp64vec2 = vec_xl(0, x+(i+20));
- x11_fp64vec2 = vec_xl(0, x+(i+22));
-
- y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_add(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_add(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_add(x3_fp64vec2, c_fp64vec2);
- y4_fp64vec2 = vec_add(x4_fp64vec2, c_fp64vec2);
- y5_fp64vec2 = vec_add(x5_fp64vec2, c_fp64vec2);
- y6_fp64vec2 = vec_add(x6_fp64vec2, c_fp64vec2);
- y7_fp64vec2 = vec_add(x7_fp64vec2, c_fp64vec2);
- y8_fp64vec2 = vec_add(x8_fp64vec2, c_fp64vec2);
- y9_fp64vec2 = vec_add(x9_fp64vec2, c_fp64vec2);
- y10_fp64vec2 = vec_add(x10_fp64vec2, c_fp64vec2);
- y11_fp64vec2 = vec_add(x11_fp64vec2, c_fp64vec2);
-
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+2 ));
- vec_xst(y2_fp64vec2, 0, y+(i+4 ));
- vec_xst(y3_fp64vec2, 0, y+(i+6 ));
- vec_xst(y4_fp64vec2, 0, y+(i+8 ));
- vec_xst(y5_fp64vec2, 0, y+(i+10));
- vec_xst(y6_fp64vec2, 0, y+(i+12));
- vec_xst(y7_fp64vec2, 0, y+(i+14));
- vec_xst(y8_fp64vec2, 0, y+(i+16));
- vec_xst(y9_fp64vec2, 0, y+(i+18));
- vec_xst(y10_fp64vec2, 0, y+(i+20));
- vec_xst(y11_fp64vec2, 0, y+(i+22));
- }
- for (; i <= n-8; i += 8)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
-
- y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_add(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_add(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_add(x3_fp64vec2, c_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+2 ));
- vec_xst(y2_fp64vec2, 0, y+(i+4 ));
- vec_xst(y3_fp64vec2, 0, y+(i+6 ));
- }
- for (; i <= n-2; i += 2)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2);
- vec_xst(y0_fp64vec2, 0, y+(i ));
- }
- for (; i < n; i++)
- y[i] = x[i] +c;
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THDoubleVector_cmul_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THDoubleVector_cmul_VSX(double *z, const double *x, const double *y, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;
- vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;
- vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;
- vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;
-
-
- for (i = 0; i <= n-24; i += 24)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- y1_fp64vec2 = vec_xl(0, y+(i+2 ));
- y2_fp64vec2 = vec_xl(0, y+(i+4 ));
- y3_fp64vec2 = vec_xl(0, y+(i+6 ));
- y4_fp64vec2 = vec_xl(0, y+(i+8 ));
- y5_fp64vec2 = vec_xl(0, y+(i+10));
- y6_fp64vec2 = vec_xl(0, y+(i+12));
- y7_fp64vec2 = vec_xl(0, y+(i+14));
- y8_fp64vec2 = vec_xl(0, y+(i+16));
- y9_fp64vec2 = vec_xl(0, y+(i+18));
- y10_fp64vec2 = vec_xl(0, y+(i+20));
- y11_fp64vec2 = vec_xl(0, y+(i+22));
-
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
- x4_fp64vec2 = vec_xl(0, x+(i+8 ));
- x5_fp64vec2 = vec_xl(0, x+(i+10));
- x6_fp64vec2 = vec_xl(0, x+(i+12));
- x7_fp64vec2 = vec_xl(0, x+(i+14));
- x8_fp64vec2 = vec_xl(0, x+(i+16));
- x9_fp64vec2 = vec_xl(0, x+(i+18));
- x10_fp64vec2 = vec_xl(0, x+(i+20));
- x11_fp64vec2 = vec_xl(0, x+(i+22));
-
- y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2);
- y1_fp64vec2 = vec_mul(y1_fp64vec2, x1_fp64vec2);
- y2_fp64vec2 = vec_mul(y2_fp64vec2, x2_fp64vec2);
- y3_fp64vec2 = vec_mul(y3_fp64vec2, x3_fp64vec2);
- y4_fp64vec2 = vec_mul(y4_fp64vec2, x4_fp64vec2);
- y5_fp64vec2 = vec_mul(y5_fp64vec2, x5_fp64vec2);
- y6_fp64vec2 = vec_mul(y6_fp64vec2, x6_fp64vec2);
- y7_fp64vec2 = vec_mul(y7_fp64vec2, x7_fp64vec2);
- y8_fp64vec2 = vec_mul(y8_fp64vec2, x8_fp64vec2);
- y9_fp64vec2 = vec_mul(y9_fp64vec2, x9_fp64vec2);
- y10_fp64vec2 = vec_mul(y10_fp64vec2, x10_fp64vec2);
- y11_fp64vec2 = vec_mul(y11_fp64vec2, x11_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, z+(i ));
- vec_xst(y1_fp64vec2, 0, z+(i+2 ));
- vec_xst(y2_fp64vec2, 0, z+(i+4 ));
- vec_xst(y3_fp64vec2, 0, z+(i+6 ));
- vec_xst(y4_fp64vec2, 0, z+(i+8 ));
- vec_xst(y5_fp64vec2, 0, z+(i+10));
- vec_xst(y6_fp64vec2, 0, z+(i+12));
- vec_xst(y7_fp64vec2, 0, z+(i+14));
- vec_xst(y8_fp64vec2, 0, z+(i+16));
- vec_xst(y9_fp64vec2, 0, z+(i+18));
- vec_xst(y10_fp64vec2, 0, z+(i+20));
- vec_xst(y11_fp64vec2, 0, z+(i+22));
- }
- for (; i <= n-8; i += 8)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- y1_fp64vec2 = vec_xl(0, y+(i+2 ));
- y2_fp64vec2 = vec_xl(0, y+(i+4 ));
- y3_fp64vec2 = vec_xl(0, y+(i+6 ));
-
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
-
- y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2);
- y1_fp64vec2 = vec_mul(y1_fp64vec2, x1_fp64vec2);
- y2_fp64vec2 = vec_mul(y2_fp64vec2, x2_fp64vec2);
- y3_fp64vec2 = vec_mul(y3_fp64vec2, x3_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, z+(i ));
- vec_xst(y1_fp64vec2, 0, z+(i+2 ));
- vec_xst(y2_fp64vec2, 0, z+(i+4 ));
- vec_xst(y3_fp64vec2, 0, z+(i+6 ));
- }
- for (; i <= n-2; i += 2)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- x0_fp64vec2 = vec_xl(0, x+(i ));
- y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2);
- vec_xst(y0_fp64vec2, 0, z+(i ));
- }
- for (; i < n; i++)
- z[i] = x[i] * y[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THDoubleVector_muls_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THDoubleVector_muls_VSX(double *y, const double *x, const double c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- double val[2] = {c, c};
- vector double c_fp64vec2 = vec_xl(0, val);
-
- vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;
- vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;
- vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;
- vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;
-
-
- for (i = 0; i <= n-24; i += 24)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
- x4_fp64vec2 = vec_xl(0, x+(i+8 ));
- x5_fp64vec2 = vec_xl(0, x+(i+10));
- x6_fp64vec2 = vec_xl(0, x+(i+12));
- x7_fp64vec2 = vec_xl(0, x+(i+14));
- x8_fp64vec2 = vec_xl(0, x+(i+16));
- x9_fp64vec2 = vec_xl(0, x+(i+18));
- x10_fp64vec2 = vec_xl(0, x+(i+20));
- x11_fp64vec2 = vec_xl(0, x+(i+22));
-
- y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_mul(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_mul(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_mul(x3_fp64vec2, c_fp64vec2);
- y4_fp64vec2 = vec_mul(x4_fp64vec2, c_fp64vec2);
- y5_fp64vec2 = vec_mul(x5_fp64vec2, c_fp64vec2);
- y6_fp64vec2 = vec_mul(x6_fp64vec2, c_fp64vec2);
- y7_fp64vec2 = vec_mul(x7_fp64vec2, c_fp64vec2);
- y8_fp64vec2 = vec_mul(x8_fp64vec2, c_fp64vec2);
- y9_fp64vec2 = vec_mul(x9_fp64vec2, c_fp64vec2);
- y10_fp64vec2 = vec_mul(x10_fp64vec2, c_fp64vec2);
- y11_fp64vec2 = vec_mul(x11_fp64vec2, c_fp64vec2);
-
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+2 ));
- vec_xst(y2_fp64vec2, 0, y+(i+4 ));
- vec_xst(y3_fp64vec2, 0, y+(i+6 ));
- vec_xst(y4_fp64vec2, 0, y+(i+8 ));
- vec_xst(y5_fp64vec2, 0, y+(i+10));
- vec_xst(y6_fp64vec2, 0, y+(i+12));
- vec_xst(y7_fp64vec2, 0, y+(i+14));
- vec_xst(y8_fp64vec2, 0, y+(i+16));
- vec_xst(y9_fp64vec2, 0, y+(i+18));
- vec_xst(y10_fp64vec2, 0, y+(i+20));
- vec_xst(y11_fp64vec2, 0, y+(i+22));
- }
- for (; i <= n-8; i += 8)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
-
- y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_mul(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_mul(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_mul(x3_fp64vec2, c_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+2 ));
- vec_xst(y2_fp64vec2, 0, y+(i+4 ));
- vec_xst(y3_fp64vec2, 0, y+(i+6 ));
- }
- for (; i <= n-2; i += 2)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2);
- vec_xst(y0_fp64vec2, 0, y+(i ));
- }
- for (; i < n; i++)
- y[i] = c * x[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THDoubleVector_cdiv_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THDoubleVector_cdiv_VSX(double *z, const double *x, const double *y, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;
- vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;
- vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;
- vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;
-
-
- for (i = 0; i <= n-24; i += 24)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- y1_fp64vec2 = vec_xl(0, y+(i+2 ));
- y2_fp64vec2 = vec_xl(0, y+(i+4 ));
- y3_fp64vec2 = vec_xl(0, y+(i+6 ));
- y4_fp64vec2 = vec_xl(0, y+(i+8 ));
- y5_fp64vec2 = vec_xl(0, y+(i+10));
- y6_fp64vec2 = vec_xl(0, y+(i+12));
- y7_fp64vec2 = vec_xl(0, y+(i+14));
- y8_fp64vec2 = vec_xl(0, y+(i+16));
- y9_fp64vec2 = vec_xl(0, y+(i+18));
- y10_fp64vec2 = vec_xl(0, y+(i+20));
- y11_fp64vec2 = vec_xl(0, y+(i+22));
-
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
- x4_fp64vec2 = vec_xl(0, x+(i+8 ));
- x5_fp64vec2 = vec_xl(0, x+(i+10));
- x6_fp64vec2 = vec_xl(0, x+(i+12));
- x7_fp64vec2 = vec_xl(0, x+(i+14));
- x8_fp64vec2 = vec_xl(0, x+(i+16));
- x9_fp64vec2 = vec_xl(0, x+(i+18));
- x10_fp64vec2 = vec_xl(0, x+(i+20));
- x11_fp64vec2 = vec_xl(0, x+(i+22));
-
- y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2);
- y1_fp64vec2 = vec_div(x1_fp64vec2, y1_fp64vec2);
- y2_fp64vec2 = vec_div(x2_fp64vec2, y2_fp64vec2);
- y3_fp64vec2 = vec_div(x3_fp64vec2, y3_fp64vec2);
- y4_fp64vec2 = vec_div(x4_fp64vec2, y4_fp64vec2);
- y5_fp64vec2 = vec_div(x5_fp64vec2, y5_fp64vec2);
- y6_fp64vec2 = vec_div(x6_fp64vec2, y6_fp64vec2);
- y7_fp64vec2 = vec_div(x7_fp64vec2, y7_fp64vec2);
- y8_fp64vec2 = vec_div(x8_fp64vec2, y8_fp64vec2);
- y9_fp64vec2 = vec_div(x9_fp64vec2, y9_fp64vec2);
- y10_fp64vec2 = vec_div(x10_fp64vec2, y10_fp64vec2);
- y11_fp64vec2 = vec_div(x11_fp64vec2, y11_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, z+(i ));
- vec_xst(y1_fp64vec2, 0, z+(i+2 ));
- vec_xst(y2_fp64vec2, 0, z+(i+4 ));
- vec_xst(y3_fp64vec2, 0, z+(i+6 ));
- vec_xst(y4_fp64vec2, 0, z+(i+8 ));
- vec_xst(y5_fp64vec2, 0, z+(i+10));
- vec_xst(y6_fp64vec2, 0, z+(i+12));
- vec_xst(y7_fp64vec2, 0, z+(i+14));
- vec_xst(y8_fp64vec2, 0, z+(i+16));
- vec_xst(y9_fp64vec2, 0, z+(i+18));
- vec_xst(y10_fp64vec2, 0, z+(i+20));
- vec_xst(y11_fp64vec2, 0, z+(i+22));
- }
- for (; i <= n-8; i += 8)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- y1_fp64vec2 = vec_xl(0, y+(i+2 ));
- y2_fp64vec2 = vec_xl(0, y+(i+4 ));
- y3_fp64vec2 = vec_xl(0, y+(i+6 ));
-
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
-
- y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2);
- y1_fp64vec2 = vec_div(x1_fp64vec2, y1_fp64vec2);
- y2_fp64vec2 = vec_div(x2_fp64vec2, y2_fp64vec2);
- y3_fp64vec2 = vec_div(x3_fp64vec2, y3_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, z+(i ));
- vec_xst(y1_fp64vec2, 0, z+(i+2 ));
- vec_xst(y2_fp64vec2, 0, z+(i+4 ));
- vec_xst(y3_fp64vec2, 0, z+(i+6 ));
- }
- for (; i <= n-2; i += 2)
- {
- y0_fp64vec2 = vec_xl(0, y+(i ));
- x0_fp64vec2 = vec_xl(0, x+(i ));
- y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2);
- vec_xst(y0_fp64vec2, 0, z+(i ));
- }
- for (; i < n; i++)
- z[i] = x[i] / y[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THDoubleVector_divs_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THDoubleVector_divs_VSX(double *y, const double *x, const double c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- double val[2] = {c, c};
- vector double c_fp64vec2 = vec_xl(0, val);
-
- vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;
- vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;
- vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;
- vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;
-
-
- for (i = 0; i <= n-24; i += 24)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
- x4_fp64vec2 = vec_xl(0, x+(i+8 ));
- x5_fp64vec2 = vec_xl(0, x+(i+10));
- x6_fp64vec2 = vec_xl(0, x+(i+12));
- x7_fp64vec2 = vec_xl(0, x+(i+14));
- x8_fp64vec2 = vec_xl(0, x+(i+16));
- x9_fp64vec2 = vec_xl(0, x+(i+18));
- x10_fp64vec2 = vec_xl(0, x+(i+20));
- x11_fp64vec2 = vec_xl(0, x+(i+22));
-
- y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2);
- y4_fp64vec2 = vec_div(x4_fp64vec2, c_fp64vec2);
- y5_fp64vec2 = vec_div(x5_fp64vec2, c_fp64vec2);
- y6_fp64vec2 = vec_div(x6_fp64vec2, c_fp64vec2);
- y7_fp64vec2 = vec_div(x7_fp64vec2, c_fp64vec2);
- y8_fp64vec2 = vec_div(x8_fp64vec2, c_fp64vec2);
- y9_fp64vec2 = vec_div(x9_fp64vec2, c_fp64vec2);
- y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2);
- y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2);
-
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+2 ));
- vec_xst(y2_fp64vec2, 0, y+(i+4 ));
- vec_xst(y3_fp64vec2, 0, y+(i+6 ));
- vec_xst(y4_fp64vec2, 0, y+(i+8 ));
- vec_xst(y5_fp64vec2, 0, y+(i+10));
- vec_xst(y6_fp64vec2, 0, y+(i+12));
- vec_xst(y7_fp64vec2, 0, y+(i+14));
- vec_xst(y8_fp64vec2, 0, y+(i+16));
- vec_xst(y9_fp64vec2, 0, y+(i+18));
- vec_xst(y10_fp64vec2, 0, y+(i+20));
- vec_xst(y11_fp64vec2, 0, y+(i+22));
- }
- for (; i <= n-8; i += 8)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+2 ));
- x2_fp64vec2 = vec_xl(0, x+(i+4 ));
- x3_fp64vec2 = vec_xl(0, x+(i+6 ));
-
- y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+2 ));
- vec_xst(y2_fp64vec2, 0, y+(i+4 ));
- vec_xst(y3_fp64vec2, 0, y+(i+6 ));
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+2 ));
- vec_xst(y2_fp64vec2, 0, y+(i+4 ));
- vec_xst(y3_fp64vec2, 0, y+(i+6 ));
- }
- for (; i <= n-2; i += 2)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2);
- vec_xst(y0_fp64vec2, 0, y+(i ));
- }
- for (; i < n; i++)
- y[i] = x[i] / c;
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THFloatVector_fill_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THFloatVector_fill_VSX(float *x, const float c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- float val[4] = {c, c, c, c};
- vector float fp32vec4 = vec_xl(0, val);
-
- for (i = 0; i <= n-256; i += 256)
- {
- vec_xst(fp32vec4, 0, x+(i ));
- vec_xst(fp32vec4, 0, x+(i+4 ));
- vec_xst(fp32vec4, 0, x+(i+8 ));
- vec_xst(fp32vec4, 0, x+(i+12 ));
- vec_xst(fp32vec4, 0, x+(i+16 ));
- vec_xst(fp32vec4, 0, x+(i+20 ));
- vec_xst(fp32vec4, 0, x+(i+24 ));
- vec_xst(fp32vec4, 0, x+(i+28 ));
- vec_xst(fp32vec4, 0, x+(i+32 ));
- vec_xst(fp32vec4, 0, x+(i+36 ));
- vec_xst(fp32vec4, 0, x+(i+40 ));
- vec_xst(fp32vec4, 0, x+(i+44 ));
- vec_xst(fp32vec4, 0, x+(i+48 ));
- vec_xst(fp32vec4, 0, x+(i+52 ));
- vec_xst(fp32vec4, 0, x+(i+56 ));
- vec_xst(fp32vec4, 0, x+(i+60 ));
- vec_xst(fp32vec4, 0, x+(i+64 ));
- vec_xst(fp32vec4, 0, x+(i+68 ));
- vec_xst(fp32vec4, 0, x+(i+72 ));
- vec_xst(fp32vec4, 0, x+(i+76 ));
- vec_xst(fp32vec4, 0, x+(i+80 ));
- vec_xst(fp32vec4, 0, x+(i+84 ));
- vec_xst(fp32vec4, 0, x+(i+88 ));
- vec_xst(fp32vec4, 0, x+(i+92 ));
- vec_xst(fp32vec4, 0, x+(i+96 ));
- vec_xst(fp32vec4, 0, x+(i+100));
- vec_xst(fp32vec4, 0, x+(i+104));
- vec_xst(fp32vec4, 0, x+(i+108));
- vec_xst(fp32vec4, 0, x+(i+112));
- vec_xst(fp32vec4, 0, x+(i+116));
- vec_xst(fp32vec4, 0, x+(i+120));
- vec_xst(fp32vec4, 0, x+(i+124));
- vec_xst(fp32vec4, 0, x+(i+128));
- vec_xst(fp32vec4, 0, x+(i+132));
- vec_xst(fp32vec4, 0, x+(i+136));
- vec_xst(fp32vec4, 0, x+(i+140));
- vec_xst(fp32vec4, 0, x+(i+144));
- vec_xst(fp32vec4, 0, x+(i+148));
- vec_xst(fp32vec4, 0, x+(i+152));
- vec_xst(fp32vec4, 0, x+(i+156));
- vec_xst(fp32vec4, 0, x+(i+160));
- vec_xst(fp32vec4, 0, x+(i+164));
- vec_xst(fp32vec4, 0, x+(i+168));
- vec_xst(fp32vec4, 0, x+(i+172));
- vec_xst(fp32vec4, 0, x+(i+176));
- vec_xst(fp32vec4, 0, x+(i+180));
- vec_xst(fp32vec4, 0, x+(i+184));
- vec_xst(fp32vec4, 0, x+(i+188));
- vec_xst(fp32vec4, 0, x+(i+192));
- vec_xst(fp32vec4, 0, x+(i+196));
- vec_xst(fp32vec4, 0, x+(i+200));
- vec_xst(fp32vec4, 0, x+(i+204));
- vec_xst(fp32vec4, 0, x+(i+208));
- vec_xst(fp32vec4, 0, x+(i+212));
- vec_xst(fp32vec4, 0, x+(i+216));
- vec_xst(fp32vec4, 0, x+(i+220));
- vec_xst(fp32vec4, 0, x+(i+224));
- vec_xst(fp32vec4, 0, x+(i+228));
- vec_xst(fp32vec4, 0, x+(i+232));
- vec_xst(fp32vec4, 0, x+(i+236));
- vec_xst(fp32vec4, 0, x+(i+240));
- vec_xst(fp32vec4, 0, x+(i+244));
- vec_xst(fp32vec4, 0, x+(i+248));
- vec_xst(fp32vec4, 0, x+(i+252));
- }
- for (; i <= n-32; i += 32)
- {
- vec_xst(fp32vec4, 0, x+(i ));
- vec_xst(fp32vec4, 0, x+(i+4 ));
- vec_xst(fp32vec4, 0, x+(i+8 ));
- vec_xst(fp32vec4, 0, x+(i+12 ));
- vec_xst(fp32vec4, 0, x+(i+16 ));
- vec_xst(fp32vec4, 0, x+(i+20 ));
- vec_xst(fp32vec4, 0, x+(i+24 ));
- vec_xst(fp32vec4, 0, x+(i+28 ));
- }
- for (; i <= n-4; i += 4)
- vec_xst(fp32vec4, 0, x+(i ));
- for (; i < n; i++)
- x[i] = c;
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THFloatVector_cadd_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THFloatVector_cadd_VSX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- float val[4] = {c, c, c, c};
- vector float c_fp32vec4 = vec_xl(0, val);
-
- vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;
- vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;
- vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;
- vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;
-
-
- for (i = 0; i <= n-48; i += 48)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- y1_fp32vec4 = vec_xl(0, y+(i+4 ));
- y2_fp32vec4 = vec_xl(0, y+(i+8 ));
- y3_fp32vec4 = vec_xl(0, y+(i+12));
- y4_fp32vec4 = vec_xl(0, y+(i+16 ));
- y5_fp32vec4 = vec_xl(0, y+(i+20));
- y6_fp32vec4 = vec_xl(0, y+(i+24));
- y7_fp32vec4 = vec_xl(0, y+(i+28));
- y8_fp32vec4 = vec_xl(0, y+(i+32));
- y9_fp32vec4 = vec_xl(0, y+(i+36));
- y10_fp32vec4 = vec_xl(0, y+(i+40));
- y11_fp32vec4 = vec_xl(0, y+(i+44));
-
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12 ));
- x4_fp32vec4 = vec_xl(0, x+(i+16 ));
- x5_fp32vec4 = vec_xl(0, x+(i+20));
- x6_fp32vec4 = vec_xl(0, x+(i+24));
- x7_fp32vec4 = vec_xl(0, x+(i+28));
- x8_fp32vec4 = vec_xl(0, x+(i+32));
- x9_fp32vec4 = vec_xl(0, x+(i+36));
- x10_fp32vec4 = vec_xl(0, x+(i+40));
- x11_fp32vec4 = vec_xl(0, x+(i+44));
-
- y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4);
- y1_fp32vec4 = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4);
- y2_fp32vec4 = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4);
- y3_fp32vec4 = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4);
- y4_fp32vec4 = vec_madd(y4_fp32vec4, c_fp32vec4, x4_fp32vec4);
- y5_fp32vec4 = vec_madd(y5_fp32vec4, c_fp32vec4, x5_fp32vec4);
- y6_fp32vec4 = vec_madd(y6_fp32vec4, c_fp32vec4, x6_fp32vec4);
- y7_fp32vec4 = vec_madd(y7_fp32vec4, c_fp32vec4, x7_fp32vec4);
- y8_fp32vec4 = vec_madd(y8_fp32vec4, c_fp32vec4, x8_fp32vec4);
- y9_fp32vec4 = vec_madd(y9_fp32vec4, c_fp32vec4, x9_fp32vec4);
- y10_fp32vec4 = vec_madd(y10_fp32vec4, c_fp32vec4, x10_fp32vec4);
- y11_fp32vec4 = vec_madd(y11_fp32vec4, c_fp32vec4, x11_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, z+(i ));
- vec_xst(y1_fp32vec4, 0, z+(i+4 ));
- vec_xst(y2_fp32vec4, 0, z+(i+8 ));
- vec_xst(y3_fp32vec4, 0, z+(i+12 ));
- vec_xst(y4_fp32vec4, 0, z+(i+16 ));
- vec_xst(y5_fp32vec4, 0, z+(i+20));
- vec_xst(y6_fp32vec4, 0, z+(i+24));
- vec_xst(y7_fp32vec4, 0, z+(i+28));
- vec_xst(y8_fp32vec4, 0, z+(i+32));
- vec_xst(y9_fp32vec4, 0, z+(i+36));
- vec_xst(y10_fp32vec4, 0, z+(i+40));
- vec_xst(y11_fp32vec4, 0, z+(i+44));
- }
- for (; i <= n-16; i += 16)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- y1_fp32vec4 = vec_xl(0, y+(i+4 ));
- y2_fp32vec4 = vec_xl(0, y+(i+8 ));
- y3_fp32vec4 = vec_xl(0, y+(i+12 ));
-
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12 ));
-
- y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4);
- y1_fp32vec4 = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4);
- y2_fp32vec4 = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4);
- y3_fp32vec4 = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, z+(i ));
- vec_xst(y1_fp32vec4, 0, z+(i+4 ));
- vec_xst(y2_fp32vec4, 0, z+(i+8 ));
- vec_xst(y3_fp32vec4, 0, z+(i+12 ));
- }
- for (; i <= n-4; i += 4)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- x0_fp32vec4 = vec_xl(0, x+(i ));
- y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4);
- vec_xst(y0_fp32vec4, 0, z+(i ));
- }
- for (; i < n; i++)
- z[i] = x[i] + c* y[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THFloatVector_adds_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THFloatVector_adds_VSX(float *y, const float *x, const float c, const ptrdiff_t n)
-{
- ptrdiff_t i;
- float val[4] = {c, c, c, c};
- vector float c_fp32vec4 = vec_xl(0, val);
-
- vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;
- vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;
- vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;
- vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;
-
-
- for (i = 0; i <= n-48; i += 48)
- {
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12));
- x4_fp32vec4 = vec_xl(0, x+(i+16));
- x5_fp32vec4 = vec_xl(0, x+(i+20));
- x6_fp32vec4 = vec_xl(0, x+(i+24));
- x7_fp32vec4 = vec_xl(0, x+(i+28));
- x8_fp32vec4 = vec_xl(0, x+(i+32));
- x9_fp32vec4 = vec_xl(0, x+(i+36));
- x10_fp32vec4 = vec_xl(0, x+(i+40));
- x11_fp32vec4 = vec_xl(0, x+(i+44));
-
- y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4);
- y1_fp32vec4 = vec_add(x1_fp32vec4, c_fp32vec4);
- y2_fp32vec4 = vec_add(x2_fp32vec4, c_fp32vec4);
- y3_fp32vec4 = vec_add(x3_fp32vec4, c_fp32vec4);
- y4_fp32vec4 = vec_add(x4_fp32vec4, c_fp32vec4);
- y5_fp32vec4 = vec_add(x5_fp32vec4, c_fp32vec4);
- y6_fp32vec4 = vec_add(x6_fp32vec4, c_fp32vec4);
- y7_fp32vec4 = vec_add(x7_fp32vec4, c_fp32vec4);
- y8_fp32vec4 = vec_add(x8_fp32vec4, c_fp32vec4);
- y9_fp32vec4 = vec_add(x9_fp32vec4, c_fp32vec4);
- y10_fp32vec4 = vec_add(x10_fp32vec4, c_fp32vec4);
- y11_fp32vec4 = vec_add(x11_fp32vec4, c_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, y+(i ));
- vec_xst(y1_fp32vec4, 0, y+(i+4 ));
- vec_xst(y2_fp32vec4, 0, y+(i+8 ));
- vec_xst(y3_fp32vec4, 0, y+(i+12));
- vec_xst(y4_fp32vec4, 0, y+(i+16));
- vec_xst(y5_fp32vec4, 0, y+(i+20));
- vec_xst(y6_fp32vec4, 0, y+(i+24));
- vec_xst(y7_fp32vec4, 0, y+(i+28));
- vec_xst(y8_fp32vec4, 0, y+(i+32));
- vec_xst(y9_fp32vec4, 0, y+(i+36));
- vec_xst(y10_fp32vec4, 0, y+(i+40));
- vec_xst(y11_fp32vec4, 0, y+(i+44));
- }
- for (; i <= n-16; i += 16)
- {
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12));
-
- y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4);
- y1_fp32vec4 = vec_add(x1_fp32vec4, c_fp32vec4);
- y2_fp32vec4 = vec_add(x2_fp32vec4, c_fp32vec4);
- y3_fp32vec4 = vec_add(x3_fp32vec4, c_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, y+(i ));
- vec_xst(y1_fp32vec4, 0, y+(i+4 ));
- vec_xst(y2_fp32vec4, 0, y+(i+8 ));
- vec_xst(y3_fp32vec4, 0, y+(i+12));
- }
- for (; i <= n-4; i += 4)
- {
- x0_fp32vec4 = vec_xl(0, x+(i ));
- y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4);
- vec_xst(y0_fp32vec4, 0, y+(i ));
- }
- for (; i < n; i++)
- y[i] = c + x[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THFloatVector_cmul_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THFloatVector_cmul_VSX(float *z, const float *y, const float *x, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;
- vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;
- vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;
- vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;
-
-
- for (i = 0; i <= n-48; i += 48)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- y1_fp32vec4 = vec_xl(0, y+(i+4 ));
- y2_fp32vec4 = vec_xl(0, y+(i+8 ));
- y3_fp32vec4 = vec_xl(0, y+(i+12 ));
- y4_fp32vec4 = vec_xl(0, y+(i+16 ));
- y5_fp32vec4 = vec_xl(0, y+(i+20));
- y6_fp32vec4 = vec_xl(0, y+(i+24));
- y7_fp32vec4 = vec_xl(0, y+(i+28));
- y8_fp32vec4 = vec_xl(0, y+(i+32));
- y9_fp32vec4 = vec_xl(0, y+(i+36));
- y10_fp32vec4 = vec_xl(0, y+(i+40));
- y11_fp32vec4 = vec_xl(0, y+(i+44));
-
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12 ));
- x4_fp32vec4 = vec_xl(0, x+(i+16 ));
- x5_fp32vec4 = vec_xl(0, x+(i+20));
- x6_fp32vec4 = vec_xl(0, x+(i+24));
- x7_fp32vec4 = vec_xl(0, x+(i+28));
- x8_fp32vec4 = vec_xl(0, x+(i+32));
- x9_fp32vec4 = vec_xl(0, x+(i+36));
- x10_fp32vec4 = vec_xl(0, x+(i+40));
- x11_fp32vec4 = vec_xl(0, x+(i+44));
-
- y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4);
- y1_fp32vec4 = vec_mul(y1_fp32vec4, x1_fp32vec4);
- y2_fp32vec4 = vec_mul(y2_fp32vec4, x2_fp32vec4);
- y3_fp32vec4 = vec_mul(y3_fp32vec4, x3_fp32vec4);
- y4_fp32vec4 = vec_mul(y4_fp32vec4, x4_fp32vec4);
- y5_fp32vec4 = vec_mul(y5_fp32vec4, x5_fp32vec4);
- y6_fp32vec4 = vec_mul(y6_fp32vec4, x6_fp32vec4);
- y7_fp32vec4 = vec_mul(y7_fp32vec4, x7_fp32vec4);
- y8_fp32vec4 = vec_mul(y8_fp32vec4, x8_fp32vec4);
- y9_fp32vec4 = vec_mul(y9_fp32vec4, x9_fp32vec4);
- y10_fp32vec4 = vec_mul(y10_fp32vec4, x10_fp32vec4);
- y11_fp32vec4 = vec_mul(y11_fp32vec4, x11_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, z+(i ));
- vec_xst(y1_fp32vec4, 0, z+(i+4 ));
- vec_xst(y2_fp32vec4, 0, z+(i+8 ));
- vec_xst(y3_fp32vec4, 0, z+(i+12 ));
- vec_xst(y4_fp32vec4, 0, z+(i+16 ));
- vec_xst(y5_fp32vec4, 0, z+(i+20));
- vec_xst(y6_fp32vec4, 0, z+(i+24));
- vec_xst(y7_fp32vec4, 0, z+(i+28));
- vec_xst(y8_fp32vec4, 0, z+(i+32));
- vec_xst(y9_fp32vec4, 0, z+(i+36));
- vec_xst(y10_fp32vec4, 0, z+(i+40));
- vec_xst(y11_fp32vec4, 0, z+(i+44));
- }
- for (; i <= n-16; i += 16)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- y1_fp32vec4 = vec_xl(0, y+(i+4 ));
- y2_fp32vec4 = vec_xl(0, y+(i+8 ));
- y3_fp32vec4 = vec_xl(0, y+(i+12 ));
-
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12 ));
-
- y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4);
- y1_fp32vec4 = vec_mul(y1_fp32vec4, x1_fp32vec4);
- y2_fp32vec4 = vec_mul(y2_fp32vec4, x2_fp32vec4);
- y3_fp32vec4 = vec_mul(y3_fp32vec4, x3_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, z+(i ));
- vec_xst(y1_fp32vec4, 0, z+(i+4 ));
- vec_xst(y2_fp32vec4, 0, z+(i+8 ));
- vec_xst(y3_fp32vec4, 0, z+(i+12 ));
- }
- for (; i <= n-4; i += 4)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- x0_fp32vec4 = vec_xl(0, x+(i ));
- y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4);
- vec_xst(y0_fp32vec4, 0, z+(i ));
- }
- for (; i < n; i++)
- z[i] = y[i] * x[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THFloatVector_muls_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THFloatVector_muls_VSX(float *y, const float *x, const float c, const ptrdiff_t n)
-{
- ptrdiff_t i;
- float val[4] = {c, c, c, c};
- vector float c_fp32vec4 = vec_xl(0, val);
-
- vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;
- vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;
- vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;
- vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;
-
-
- for (i = 0; i <= n-48; i += 48)
- {
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12));
- x4_fp32vec4 = vec_xl(0, x+(i+16));
- x5_fp32vec4 = vec_xl(0, x+(i+20));
- x6_fp32vec4 = vec_xl(0, x+(i+24));
- x7_fp32vec4 = vec_xl(0, x+(i+28));
- x8_fp32vec4 = vec_xl(0, x+(i+32));
- x9_fp32vec4 = vec_xl(0, x+(i+36));
- x10_fp32vec4 = vec_xl(0, x+(i+40));
- x11_fp32vec4 = vec_xl(0, x+(i+44));
-
- y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4);
- y1_fp32vec4 = vec_mul(x1_fp32vec4, c_fp32vec4);
- y2_fp32vec4 = vec_mul(x2_fp32vec4, c_fp32vec4);
- y3_fp32vec4 = vec_mul(x3_fp32vec4, c_fp32vec4);
- y4_fp32vec4 = vec_mul(x4_fp32vec4, c_fp32vec4);
- y5_fp32vec4 = vec_mul(x5_fp32vec4, c_fp32vec4);
- y6_fp32vec4 = vec_mul(x6_fp32vec4, c_fp32vec4);
- y7_fp32vec4 = vec_mul(x7_fp32vec4, c_fp32vec4);
- y8_fp32vec4 = vec_mul(x8_fp32vec4, c_fp32vec4);
- y9_fp32vec4 = vec_mul(x9_fp32vec4, c_fp32vec4);
- y10_fp32vec4 = vec_mul(x10_fp32vec4, c_fp32vec4);
- y11_fp32vec4 = vec_mul(x11_fp32vec4, c_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, y+(i ));
- vec_xst(y1_fp32vec4, 0, y+(i+4 ));
- vec_xst(y2_fp32vec4, 0, y+(i+8 ));
- vec_xst(y3_fp32vec4, 0, y+(i+12));
- vec_xst(y4_fp32vec4, 0, y+(i+16));
- vec_xst(y5_fp32vec4, 0, y+(i+20));
- vec_xst(y6_fp32vec4, 0, y+(i+24));
- vec_xst(y7_fp32vec4, 0, y+(i+28));
- vec_xst(y8_fp32vec4, 0, y+(i+32));
- vec_xst(y9_fp32vec4, 0, y+(i+36));
- vec_xst(y10_fp32vec4, 0, y+(i+40));
- vec_xst(y11_fp32vec4, 0, y+(i+44));
- }
- for (; i <= n-16; i += 16)
- {
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12));
-
- y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4);
- y1_fp32vec4 = vec_mul(x1_fp32vec4, c_fp32vec4);
- y2_fp32vec4 = vec_mul(x2_fp32vec4, c_fp32vec4);
- y3_fp32vec4 = vec_mul(x3_fp32vec4, c_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, y+(i ));
- vec_xst(y1_fp32vec4, 0, y+(i+4 ));
- vec_xst(y2_fp32vec4, 0, y+(i+8 ));
- vec_xst(y3_fp32vec4, 0, y+(i+12));
- }
- for (; i <= n-4; i += 4)
- {
- x0_fp32vec4 = vec_xl(0, x+(i ));
- y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4);
- vec_xst(y0_fp32vec4, 0, y+(i ));
- }
- for (; i < n; i++)
- y[i] = c * x[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THFloatVector_cdiv_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THFloatVector_cdiv_VSX(float *z, const float *x, const float *y, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;
- vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;
- vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;
- vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;
-
-
- for (i = 0; i <= n-48; i += 48)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- y1_fp32vec4 = vec_xl(0, y+(i+4));
- y2_fp32vec4 = vec_xl(0, y+(i+8));
- y3_fp32vec4 = vec_xl(0, y+(i+12));
- y4_fp32vec4 = vec_xl(0, y+(i+16));
- y5_fp32vec4 = vec_xl(0, y+(i+20));
- y6_fp32vec4 = vec_xl(0, y+(i+24));
- y7_fp32vec4 = vec_xl(0, y+(i+28));
- y8_fp32vec4 = vec_xl(0, y+(i+32));
- y9_fp32vec4 = vec_xl(0, y+(i+36));
- y10_fp32vec4 = vec_xl(0, y+(i+40));
- y11_fp32vec4 = vec_xl(0, y+(i+44));
-
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12 ));
- x4_fp32vec4 = vec_xl(0, x+(i+16 ));
- x5_fp32vec4 = vec_xl(0, x+(i+20));
- x6_fp32vec4 = vec_xl(0, x+(i+24));
- x7_fp32vec4 = vec_xl(0, x+(i+28));
- x8_fp32vec4 = vec_xl(0, x+(i+32));
- x9_fp32vec4 = vec_xl(0, x+(i+36));
- x10_fp32vec4 = vec_xl(0, x+(i+40));
- x11_fp32vec4 = vec_xl(0, x+(i+44));
-
- y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4);
- y1_fp32vec4 = vec_div(x1_fp32vec4, y1_fp32vec4);
- y2_fp32vec4 = vec_div(x2_fp32vec4, y2_fp32vec4);
- y3_fp32vec4 = vec_div(x3_fp32vec4, y3_fp32vec4);
- y4_fp32vec4 = vec_div(x4_fp32vec4, y4_fp32vec4);
- y5_fp32vec4 = vec_div(x5_fp32vec4, y5_fp32vec4);
- y6_fp32vec4 = vec_div(x6_fp32vec4, y6_fp32vec4);
- y7_fp32vec4 = vec_div(x7_fp32vec4, y7_fp32vec4);
- y8_fp32vec4 = vec_div(x8_fp32vec4, y8_fp32vec4);
- y9_fp32vec4 = vec_div(x9_fp32vec4, y9_fp32vec4);
- y10_fp32vec4 = vec_div(x10_fp32vec4, y10_fp32vec4);
- y11_fp32vec4 = vec_div(x11_fp32vec4, y11_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, z+(i ));
- vec_xst(y1_fp32vec4, 0, z+(i+4 ));
- vec_xst(y2_fp32vec4, 0, z+(i+8 ));
- vec_xst(y3_fp32vec4, 0, z+(i+12 ));
- vec_xst(y4_fp32vec4, 0, z+(i+16 ));
- vec_xst(y5_fp32vec4, 0, z+(i+20));
- vec_xst(y6_fp32vec4, 0, z+(i+24));
- vec_xst(y7_fp32vec4, 0, z+(i+28));
- vec_xst(y8_fp32vec4, 0, z+(i+32));
- vec_xst(y9_fp32vec4, 0, z+(i+36));
- vec_xst(y10_fp32vec4, 0, z+(i+40));
- vec_xst(y11_fp32vec4, 0, z+(i+44));
- }
- for (; i <= n-16; i += 16)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- y1_fp32vec4 = vec_xl(0, y+(i+4 ));
- y2_fp32vec4 = vec_xl(0, y+(i+8 ));
- y3_fp32vec4 = vec_xl(0, y+(i+12 ));
-
- x0_fp32vec4 = vec_xl(0, x+(i ));
- x1_fp32vec4 = vec_xl(0, x+(i+4 ));
- x2_fp32vec4 = vec_xl(0, x+(i+8 ));
- x3_fp32vec4 = vec_xl(0, x+(i+12 ));
-
- y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4);
- y1_fp32vec4 = vec_div(x1_fp32vec4, y1_fp32vec4);
- y2_fp32vec4 = vec_div(x2_fp32vec4, y2_fp32vec4);
- y3_fp32vec4 = vec_div(x3_fp32vec4, y3_fp32vec4);
-
- vec_xst(y0_fp32vec4, 0, z+(i ));
- vec_xst(y1_fp32vec4, 0, z+(i+4 ));
- vec_xst(y2_fp32vec4, 0, z+(i+8 ));
- vec_xst(y3_fp32vec4, 0, z+(i+12 ));
- }
- for (; i <= n-4; i += 4)
- {
- y0_fp32vec4 = vec_xl(0, y+(i ));
- x0_fp32vec4 = vec_xl(0, x+(i ));
- y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4);
- vec_xst(y0_fp32vec4, 0, z+(i ));
- }
- for (; i < n; i++)
- z[i] = x[i] / y[i];
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// THFloatVector_divs_VSX:
-//--------------------------------------------------------------------------------------------------
-static void THFloatVector_divs_VSX(float *y, const float*x, const float c, const ptrdiff_t n)
-{
- ptrdiff_t i;
-
- float val[4] = {c, c, c, c};
- vector float c_fp64vec2 = vec_xl(0, val);
-
- vector float y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;
- vector float y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;
- vector float x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;
- vector float x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;
-
-
- for (i = 0; i <= n-48; i += 48)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+4 ));
- x2_fp64vec2 = vec_xl(0, x+(i+8 ));
- x3_fp64vec2 = vec_xl(0, x+(i+12 ));
- x4_fp64vec2 = vec_xl(0, x+(i+16 ));
- x5_fp64vec2 = vec_xl(0, x+(i+20));
- x6_fp64vec2 = vec_xl(0, x+(i+24));
- x7_fp64vec2 = vec_xl(0, x+(i+28));
- x8_fp64vec2 = vec_xl(0, x+(i+32));
- x9_fp64vec2 = vec_xl(0, x+(i+36));
- x10_fp64vec2 = vec_xl(0, x+(i+40));
- x11_fp64vec2 = vec_xl(0, x+(i+44));
-
- y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2);
- y4_fp64vec2 = vec_div(x4_fp64vec2, c_fp64vec2);
- y5_fp64vec2 = vec_div(x5_fp64vec2, c_fp64vec2);
- y6_fp64vec2 = vec_div(x6_fp64vec2, c_fp64vec2);
- y7_fp64vec2 = vec_div(x7_fp64vec2, c_fp64vec2);
- y8_fp64vec2 = vec_div(x8_fp64vec2, c_fp64vec2);
- y9_fp64vec2 = vec_div(x9_fp64vec2, c_fp64vec2);
- y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2);
- y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2);
-
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+4 ));
- vec_xst(y2_fp64vec2, 0, y+(i+8 ));
- vec_xst(y3_fp64vec2, 0, y+(i+12 ));
- vec_xst(y4_fp64vec2, 0, y+(i+16 ));
- vec_xst(y5_fp64vec2, 0, y+(i+20));
- vec_xst(y6_fp64vec2, 0, y+(i+24));
- vec_xst(y7_fp64vec2, 0, y+(i+28));
- vec_xst(y8_fp64vec2, 0, y+(i+32));
- vec_xst(y9_fp64vec2, 0, y+(i+36));
- vec_xst(y10_fp64vec2, 0, y+(i+40));
- vec_xst(y11_fp64vec2, 0, y+(i+44));
- }
- for (; i <= n-16; i += 16)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- x1_fp64vec2 = vec_xl(0, x+(i+4 ));
- x2_fp64vec2 = vec_xl(0, x+(i+8 ));
- x3_fp64vec2 = vec_xl(0, x+(i+12 ));
-
- y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2);
- y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2);
- y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2);
- y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2);
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+4 ));
- vec_xst(y2_fp64vec2, 0, y+(i+8 ));
- vec_xst(y3_fp64vec2, 0, y+(i+12 ));
-
- vec_xst(y0_fp64vec2, 0, y+(i ));
- vec_xst(y1_fp64vec2, 0, y+(i+4 ));
- vec_xst(y2_fp64vec2, 0, y+(i+8 ));
- vec_xst(y3_fp64vec2, 0, y+(i+16 ));
- }
- for (; i <= n-4; i += 4)
- {
- x0_fp64vec2 = vec_xl(0, x+(i ));
- y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2);
- vec_xst(y0_fp64vec2, 0, y+(i ));
- }
- for (; i < n; i++)
- y[i] = x[i] / c;
-}
-
-
-//------------------------------------------------
-//
-// Testing for correctness and performance
-//
-// If you want to run these tests, compile this
-// file with -DRUN_VSX_TESTS on a Power machine,
-// and then run the executable that is generated.
-//
-//------------------------------------------------
-//
-// Example passing run (from a Power8 machine):
-//
-// $ gcc VSX.c -O2 -D RUN_VSX_TESTS -o vsxtest
-// $ ./vsxtest
-//
-// TODO
-//
-//
-// Finished runnning all tests. All tests PASSED.
-//
-//------------------------------------------------
-#ifdef RUN_VSX_TESTS
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <assert.h>
-#include <math.h>
-
-#define VSX_PERF_NUM_TEST_ELEMENTS 100000000
-#define VSX_FUNC_NUM_TEST_ELEMENTS 2507
-
-
-//--------------------------------------------------------------------------------------------------
-// Standard implementations:
-//--------------------------------------------------------------------------------------------------
-static void standardDouble_fill(double *x, const double c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- x[i] = c;
-}
-
-static void standardFloat_fill(float *x, const float c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- x[i] = c;
-}
-
-static void standardDouble_cadd(double *z, const double *x, const double *y, const double c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- z[i] = x[i] + c * y[i];
-}
-
-static void standardFloat_cadd(float *z, const float *x, const float *y, const float c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- z[i] = x[i] + c * y[i];
-}
-
-static void standardDouble_adds(double *y, const double *x, const double c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- y[i] = c + x[i];
-}
-
-static void standardFloat_adds(float *y, const float *x, const float c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- y[i] = c + x[i];
-}
-
-static void standardDouble_cmul(double *z, const double *x, const double *y, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- z[i] = x[i] * y[i];
-}
-
-static void standardFloat_cmul(float *z, const float *x, const float *y, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- z[i] = x[i] * y[i];
-}
-
-static void standardDouble_muls(double *y, const double *x, const double c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- y[i] = c * x[i];
-}
-
-static void standardFloat_muls(float *y, const float *x, const float c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- y[i] = c * x[i];
-}
-
-static void standardDouble_cdiv(double *z, const double *x, const double *y, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- z[i] = x[i] / y[i];
-}
-
-static void standardFloat_cdiv(float *z, const float *x, const float *y, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- z[i] = x[i] / y[i];
-}
-
-static void standardDouble_divs(double *y, const double *x, const double c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- y[i] = x[i] / c;
-}
-
-static void standardFloat_divs(float *y, const float *x, const float c, const ptrdiff_t n)
-{
- for (ptrdiff_t i = 0; i < n; i++)
- y[i] = x[i] / c;
-}
-
-double randDouble()
-{
- return (double)(rand()%100)/(double)(rand()%100) * (rand()%2 ? -1.0 : 1.0);
-}
-
-int near(double a, double b)
-{
- int aClass = fpclassify(a);
- int bClass = fpclassify(b);
-
- if(aClass != bClass) // i.e. is it NAN, infinite, or finite...?
- return 0;
-
- if(aClass == FP_INFINITE) // if it is infinite, the sign must be the same, i.e. positive infinity is not near negative infinity
- return (signbit(a) == signbit(b));
- else if(aClass == FP_NORMAL) // if it is a normal number then check the magnitude of the difference between the numbers
- return fabs(a - b) < 0.001;
- else // if both number are of the same class as each other and are of any other class (i.e. such as NAN), then they are near to each other.
- return 1;
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// Standard tests:
-//--------------------------------------------------------------------------------------------------
-void test_THDoubleVector_fill_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- double *x_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *x_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
-
- double yVal0 = 17.2;
- double yVal1 = 8.2;
- double yVal2 = 5.1;
- double yVal3 = -0.9;
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardDouble_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS );
- standardDouble_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardDouble_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardDouble_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardDouble_fill() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS );
- THDoubleVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THDoubleVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THDoubleVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THDoubleVector_fill_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- yVal0 += 1.0;
- yVal1 += 1.0;
- yVal2 += 1.0;
- yVal3 -= 1.0;
-
- standardDouble_fill( x_standard, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);
- THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- assert(x_optimized[i] == yVal0);
-
- standardDouble_fill( x_standard+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THDoubleVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardDouble_fill( x_standard+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THDoubleVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardDouble_fill( x_standard+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THDoubleVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardDouble_fill( x_standard+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THDoubleVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardDouble_fill( x_standard+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THDoubleVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- assert(x_optimized[i] == x_standard[i]);
- printf("All assertions PASSED for THDoubleVector_fill_VSX() test.\n\n");
-
-
- free(x_standard);
- free(x_optimized);
-}
-
-
-void test_THFloatVector_fill_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- float *x_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *x_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
-
- float yVal0 = 17.2;
- float yVal1 = 8.2;
- float yVal2 = 5.1;
- float yVal3 = -0.9;
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardFloat_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS );
- standardFloat_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardFloat_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardFloat_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardFloat_fill() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THFloatVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS );
- THFloatVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THFloatVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THFloatVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THFloatVector_fill_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- yVal0 += 1.0;
- yVal1 += 1.0;
- yVal2 += 1.0;
- yVal3 -= 1.0;
-
- standardFloat_fill( x_standard, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);
- THFloatVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- assert(x_optimized[i] == yVal0);
-
- standardFloat_fill( x_standard+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THFloatVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardFloat_fill( x_standard+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THFloatVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardFloat_fill( x_standard+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THFloatVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardFloat_fill( x_standard+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THFloatVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardFloat_fill( x_standard+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THFloatVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- assert(x_optimized[i] == x_standard[i]);
- printf("All assertions PASSED for THFloatVector_fill_VSX() test.\n\n");
-
-
- free(x_standard);
- free(x_optimized);
-}
-
-
-void test_THDoubleVector_cadd_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double c = randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = randDouble();
- y[i] = randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardDouble_cadd() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THDoubleVector_cadd_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardDouble_cadd( z_standard+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THDoubleVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardDouble_cadd( z_standard+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THDoubleVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardDouble_cadd( z_standard+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THDoubleVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardDouble_cadd( z_standard+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THDoubleVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardDouble_cadd( z_standard+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THDoubleVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(z_optimized[i], z_standard[i]))
- printf("%d %f %f\n", i, z_optimized[i], z_standard[i]);
- assert(near(z_optimized[i], z_standard[i]));
- }
- printf("All assertions PASSED for THDoubleVector_cadd_VSX() test.\n\n");
-
-
- free(z_standard);
- free(z_optimized);
- free(x);
-}
-
-void test_THFloatVector_cadd_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float c = (float)randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = (float)randDouble();
- y[i] = (float)randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardFloat_cadd() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THFloatVector_cadd_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardFloat_cadd( z_standard+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THFloatVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardFloat_cadd( z_standard+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THFloatVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardFloat_cadd( z_standard+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THFloatVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardFloat_cadd( z_standard+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THFloatVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardFloat_cadd( z_standard+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THFloatVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(z_optimized[i], z_standard[i]))
- printf("%d %f %f\n", i, z_optimized[i], z_standard[i]);
- assert(near(z_optimized[i], z_standard[i]));
- }
- printf("All assertions PASSED for THFloatVector_cadd_VSX() test.\n\n");
-
-
- free(z_standard);
- free(z_optimized);
- free(x);
-}
-
-void test_THDoubleVector_adds_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double c = randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- x[i] = randDouble();
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardDouble_adds() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THDoubleVector_adds_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardDouble_adds( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THDoubleVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardDouble_adds( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THDoubleVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardDouble_adds( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THDoubleVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardDouble_adds( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THDoubleVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardDouble_adds( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THDoubleVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(y_optimized[i], y_standard[i]))
- printf("%d %f %f\n", i, y_optimized[i], y_standard[i]);
- assert(near(y_optimized[i], y_standard[i]));
- }
- printf("All assertions PASSED for THDoubleVector_adds_VSX() test.\n\n");
-
-
- free(y_standard);
- free(y_optimized);
- free(x);
-}
-
-
-void test_THFloatVector_adds_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float c = (float)randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- x[i] = (float)randDouble();
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardFloat_adds() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THFloatVector_adds_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardFloat_adds( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THFloatVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardFloat_adds( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THFloatVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardFloat_adds( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THFloatVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardFloat_adds( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THFloatVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardFloat_adds( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THFloatVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(y_optimized[i], y_standard[i]))
- printf("%d %f %f\n", i, y_optimized[i], y_standard[i]);
- assert(near(y_optimized[i], y_standard[i]));
- }
- printf("All assertions PASSED for THFloatVector_adds_VSX() test.\n\n");
-
-
- free(y_standard);
- free(y_optimized);
- free(x);
-}
-
-
-void test_THDoubleVector_cmul_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = randDouble();
- y[i] = randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardDouble_cmul() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THDoubleVector_cmul_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardDouble_cmul( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THDoubleVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardDouble_cmul( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THDoubleVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardDouble_cmul( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THDoubleVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardDouble_cmul( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THDoubleVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardDouble_cmul( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THDoubleVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(z_optimized[i], z_standard[i]))
- printf("%d %f %f\n", i, z_optimized[i], z_standard[i]);
- assert(near(z_optimized[i], z_standard[i]));
- }
- printf("All assertions PASSED for THDoubleVector_cmul_VSX() test.\n\n");
-
-
- free(z_standard);
- free(z_optimized);
- free(x);
-}
-
-void test_THFloatVector_cmul_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = (float)randDouble();
- y[i] = (float)randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardFloat_cmul() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THFloatVector_cmul_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardFloat_cmul( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THFloatVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardFloat_cmul( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THFloatVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardFloat_cmul( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THFloatVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardFloat_cmul( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THFloatVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardFloat_cmul( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THFloatVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(z_optimized[i], z_standard[i]))
- printf("%d %f %f\n", i, z_optimized[i], z_standard[i]);
- assert(near(z_optimized[i], z_standard[i]));
- }
- printf("All assertions PASSED for THFloatVector_cmul_VSX() test.\n\n");
-
-
- free(z_standard);
- free(z_optimized);
- free(x);
-}
-
-void test_THDoubleVector_muls_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double c = randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardDouble_muls() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THDoubleVector_muls_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardDouble_muls( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THDoubleVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardDouble_muls( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THDoubleVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardDouble_muls( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THDoubleVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardDouble_muls( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THDoubleVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardDouble_muls( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THDoubleVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
-
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(y_optimized[i], y_standard[i]))
- printf("%d %f %f\n", i, y_optimized[i], y_standard[i]);
- assert(near(y_optimized[i], y_standard[i]));
- }
- printf("All assertions PASSED for THDoubleVector_muls_VSX() test.\n\n");
-
-
- free(y_standard);
- free(y_optimized);
- free(x);
-}
-
-void test_THFloatVector_muls_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float c = (float)randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = (float)randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardFloat_muls() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THFloatVector_muls_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardFloat_muls( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THFloatVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardFloat_muls( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THFloatVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardFloat_muls( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THFloatVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardFloat_muls( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THFloatVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardFloat_muls( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THFloatVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(y_optimized[i], y_standard[i]))
- printf("%d %f %f\n", i, y_optimized[i], y_standard[i]);
- assert(near(y_optimized[i], y_standard[i]));
- }
- printf("All assertions PASSED for THFloatVector_muls_VSX() test.\n\n");
-
-
- free(y_standard);
- free(y_optimized);
- free(x);
-}
-
-
-
-void test_THDoubleVector_cdiv_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = randDouble();
- y[i] = randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardDouble_cdiv() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THDoubleVector_cdiv_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardDouble_cdiv( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THDoubleVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardDouble_cdiv( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THDoubleVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardDouble_cdiv( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THDoubleVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardDouble_cdiv( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THDoubleVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardDouble_cdiv( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THDoubleVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(z_optimized[i], z_standard[i]))
- printf("%d %f %f\n", i, z_optimized[i], z_standard[i]);
- assert(near(z_optimized[i], z_standard[i]));
- }
- printf("All assertions PASSED for THDoubleVector_cdiv_VSX() test.\n\n");
-
-
- free(z_standard);
- free(z_optimized);
- free(x);
-}
-
-void test_THFloatVector_cdiv_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = (float)randDouble();
- y[i] = (float)randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardFloat_cdiv() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS );
- THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THFloatVector_cdiv_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardFloat_cdiv( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THFloatVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardFloat_cdiv( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THFloatVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardFloat_cdiv( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THFloatVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardFloat_cdiv( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THFloatVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardFloat_cdiv( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THFloatVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(z_optimized[i], z_standard[i]))
- printf("%d %f %f\n", i, z_optimized[i], z_standard[i]);
- assert(near(z_optimized[i], z_standard[i]));
- }
- printf("All assertions PASSED for THFloatVector_cdiv_VSX() test.\n\n");
-
-
- free(z_standard);
- free(z_optimized);
- free(x);
-}
-
-void test_THDoubleVector_divs_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));
- double c = randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardDouble_divs() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THDoubleVector_divs_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardDouble_divs( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THDoubleVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardDouble_divs( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THDoubleVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardDouble_divs( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THDoubleVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardDouble_divs( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THDoubleVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardDouble_divs( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THDoubleVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
-
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(y_optimized[i], y_standard[i]))
- printf("%d %f %f\n", i, y_optimized[i], y_standard[i]);
- assert(near(y_optimized[i], y_standard[i]));
- }
- printf("All assertions PASSED for THDoubleVector_divs_VSX() test.\n\n");
-
-
- free(y_standard);
- free(y_optimized);
- free(x);
-}
-
-void test_THFloatVector_divs_VSX()
-{
- clock_t start, end;
- double elapsedSeconds_optimized, elapsedSeconds_standard;
-
- float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));
- float c = (float)randDouble();
-
- // Initialize randomly
- for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)
- {
- x[i] = (float)randDouble();
- }
-
-
- //-------------------------------------------------
- // Performance Test
- //-------------------------------------------------
- start = clock();
- standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;
- printf("standardFloat_divs() test took %.5lf seconds\n", elapsedSeconds_standard);
-
- start = clock();
- THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS );
- THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);
- THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);
- THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);
- end = clock();
-
- elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;
- printf("THFloatVector_divs_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized);
-
-
- //-------------------------------------------------
- // Correctness Test
- //-------------------------------------------------
- standardFloat_divs( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- THFloatVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);
- standardFloat_divs( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- THFloatVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);
- standardFloat_divs( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- THFloatVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);
- standardFloat_divs( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- THFloatVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);
- int r = rand() % 258;
- standardFloat_divs( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
- THFloatVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));
-
- for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)
- {
- if(!near(y_optimized[i], y_standard[i]))
- printf("%d %f %f\n", i, y_optimized[i], y_standard[i]);
- assert(near(y_optimized[i], y_standard[i]));
- }
- printf("All assertions PASSED for THFloatVector_divs_VSX() test.\n\n");
-
-
- free(y_standard);
- free(y_optimized);
- free(x);
-}
-
-
-//--------------------------------------------------------------------------------------------------
-// Run tests:
-//--------------------------------------------------------------------------------------------------
-int main()
-{
- printf("\n");
-
-
- // First test utility functions
-
- assert(!near(0.1, -0.1));
- assert(!near(0.1f, -0.1f));
- assert(!near(9, 10));
- assert(near(0.1, 0.1000001));
- assert(near(0.1f, 0.1000001f));
- assert(near(100.764, 100.764));
- assert(!near(NAN, 0.0));
- assert(!near(-9.5, NAN));
- assert(!near(NAN, 100));
- assert(!near(-0.0, NAN));
- assert(near(NAN, NAN));
- assert(near(INFINITY, INFINITY));
- assert(near(-INFINITY, -INFINITY));
- assert(!near(INFINITY, NAN));
- assert(!near(0, INFINITY));
- assert(!near(-999.4324, INFINITY));
- assert(!near(INFINITY, 982374.1));
- assert(!near(-INFINITY, INFINITY));
-
-
-
- // Then test each vectorized function
-
- test_THDoubleVector_fill_VSX();
- test_THFloatVector_fill_VSX();
-
- test_THDoubleVector_cadd_VSX();
- test_THFloatVector_cadd_VSX();
-
- test_THDoubleVector_adds_VSX();
- test_THFloatVector_adds_VSX();
-
- test_THDoubleVector_cmul_VSX();
- test_THFloatVector_cmul_VSX();
-
- test_THDoubleVector_muls_VSX();
- test_THFloatVector_muls_VSX();
-
- test_THDoubleVector_cdiv_VSX();
- test_THFloatVector_cdiv_VSX();
-
- test_THDoubleVector_divs_VSX();
- test_THFloatVector_divs_VSX();
-
-
-
- printf("Finished runnning all tests. All tests PASSED.\n");
- return 0;
-}
-
-
-#endif // defined RUN_VSX_TESTS
-
-#endif // defined __PPC64__
-
diff --git a/contrib/lua-torch/torch7/lib/luaT/CMakeLists.txt b/contrib/lua-torch/torch7/lib/luaT/CMakeLists.txt
deleted file mode 100644
index 518c407f2..000000000
--- a/contrib/lua-torch/torch7/lib/luaT/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# avoid some cmake warnings
-
-INCLUDE_DIRECTORIES(${LUA_INCDIR})
-IF(LUALIB)
- LINK_DIRECTORIES(${LUA_LIBDIR}) # note: must be done before defining target
-ENDIF()
-
-ADD_LIBRARY(luaT STATIC luaT.h luaT.c)
-
-IF(LUALIB)
- TARGET_LINK_LIBRARIES(luaT ${LUALIB}) # must be done after ;)
-ENDIF()
diff --git a/contrib/lua-torch/torch7/lib/luaT/README.md b/contrib/lua-torch/torch7/lib/luaT/README.md
deleted file mode 100644
index 235b8edc0..000000000
--- a/contrib/lua-torch/torch7/lib/luaT/README.md
+++ /dev/null
@@ -1,266 +0,0 @@
-<a name="luat.dok"></a>
-# Lua Torch C API #
-
-luaT provides an API to interface Lua and C in Torch packages. It defines a
-concept of _classes_ to Lua for Torch, and provides a mechanism to easily
-handle these Lua classes from C.
-
-It additionally provides few functions that `luaL` should have defined, and
-defines several functions similar to `luaL` ones for better type error printing when using
-`luaT` classes.
-
-<a name="luat.memory.dok"></a>
-## Memory functions ##
-
-Classical memory allocation functions which generate a Lua error in case of
-problem.
-
-<a name="luaT_alloc"></a>
-### void* luaT_alloc(lua_State *L, long size) ###
-
-Allocates `size` bytes, and return a pointer on the allocated
-memory. A Lua error will be generated if running out of memory.
-
-<a name="luaT_realloc"></a>
-### void* luaT_realloc(lua_State *L, void *ptr, long size) ###
-
-Realloc `ptr` to `size` bytes. `ptr` must have been previously
-allocated with [luaT_alloc](#luaT_alloc) or
-[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc`
-functions. A Lua error will be generated if running out of memory.
-
-<a name="luaT_free"></a>
-### void luaT_free(lua_State *L, void *ptr) ###
-
-Free memory allocated at address `ptr`. The memory must have been
-previously allocated with [luaT_alloc](#luaT_alloc) or
-[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc`
-functions.
-
-<a name="luat.classcreate"></a>
-## Class creation and basic handling ##
-
-A `luaT` class is basically either a Lua _table_ or _userdata_ with
-an appropriate _metatable_. This appropriate metatable is created with
-[luaT_newmetatable](#luaT_newmetatable). Contrary to luaL userdata
-functions, luaT mechanism handles inheritance. If the class inherit from
-another class, then the metatable will itself have a metatable
-corresponding to the _parent metatable_: the metatables are cascaded
-according to the class inheritance. Multiple inheritance is not supported.
-
-<a name="luat.operatoroverloading"></a>
-### Operator overloading ###
-
-The metatable of a `luaT` object contains `Lua` operators like
-`__index`, `__newindex`, `__tostring`, `__add`
-(etc...). These operators will respectively look for `__index__`,
-`__newindex__`, `__tostring__`, `__add__` (etc...) in the
-metatable. If found, the corresponding function or value will be returned,
-else a Lua error will be raised.
-
-If one wants to provide `__index__` or `__newindex__` in the
-metaclass, these operators must follow a particular scheme:
-
- * `__index__` must either return a value _and_ `true` or return `false` only. In the first case, it means `__index__` was able to handle the given argument (for e.g., the type was correct). The second case means it was not able to do anything, so `__index` in the root metatable can then try to see if the metaclass contains the required value.
-
- * `__newindex__` must either return `true` or `false`. As for `__index__`, `true` means it could handle the argument and `false` not. If not, the root metatable `__newindex` will then raise an error if the object was a userdata, or apply a rawset if the object was a Lua table.
-
-Other metaclass operators like `__tostring__`, `__add__`, etc... do not have any particular constraint.
-
-<a name="luat_newlocalmetatable"></a>
-### const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx) ###
-
-This function creates a new metatable, which is the Lua way to define a new
-object class. As for `luaL_newmetatable`, the metatable is registered in
-the Lua registry table, with the key `tname`. In addition, `tname` is
-also registered in the Lua registry, with the metatable as key (the
-typename of a given object can be thus easily retrieved).
-
-The class name `tname` must be of the form `modulename.classname`. If not
-NULL, `parenttname` must be a valid typename corresponding to the parent
-class of the new class.
-
-If `constructor` is not NULL, a function `new` will be added to the
-metatable, pointing to this given function.
-
-A "constructor table" will be created by `luaT_newlocalmetatable`: it will
-contain all the class methods, and be callable, calling the `constructor`, if
-a `constructor` has been passed. The constructor table is either stored into
-`modulename.classname` (that is in the global namespace) if `moduleidx <=
-0` or in the table at index `moduleidx` in the stack (if `moduleidx > 0`).
-
-If not NULL, `destructor` will be called when garbage collecting the object.
-
-If not NULL, `factory` must be a Lua C function creating an empty object
-instance of the class. This functions are used in Torch for serialization.
-
-Note that classes can be partly defined in C and partly defined in Lua:
-once the metatable is created in C, it can be filled up with additional
-methods in Lua.
-
-The return value is the value returned by [luaT_typenameid](#luat_typenameid).
-
-<a name="luat_newmetatable"></a>
-### const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) ###
-
-Same as [luaT_newlocalmetatable](#luat_newmetatable), but where the
-constructor table is assigned in the global namespace (`moduleidx = 0`).
-
-<a name="luat_pushmetatable"></a>
-### int luaT_pushmetatable(lua_State *L, const name *tname) ###
-
-Push the metatable with type name `tname` on the stack, if `tname` is a
-valid Torch class name (previously registered with luaT_newmetatable).
-
-On success, returns 1. If `tname` is invalid, nothing is pushed and it
-returns 0.
-
-<a name="luat_typenameid"></a>
-### const char* luaT_typenameid(lua_State *L, const char *tname) ###
-
-If `tname` is a valid Torch class name, then returns a unique string (the
-contents will be the same as `tname`) pointing to the string registered
-in the Lua registry. This string is thus valid as long as Lua is
-running. The returned string shall not be freed.
-
-If `tname` is an invalid class name, returns NULL.
-
-<a name="luat_typename"></a>
-### const char* luaT_typename(lua_State *L, int ud) ###
-
-Returns the typename of the object at index `ud` on the stack. If it is
-not a valid Torch object, returns NULL.
-
-<a name="luat_pushudata"></a>
-### void luaT_pushudata(lua_State *L, void *udata, const char *tname) ###
-
-Given a C structure `udata`, push a userdata object on the stack with
-metatable corresponding to `tname`. Obviously, `tname` must be a valid
-Torch name registered with [luaT_newmetatable](#luat_newmetatable).
-
-<a name="luat_toudata"></a>
-### void *luaT_toudata(lua_State *L, int ud, const char *tname) ###
-
-Returns a pointer to the original C structure previously pushed on the
-stack with [luaT_pushudata](#luat_pushudata), if the object at index
-`ud` is a valid Torch class name. Returns NULL otherwise.
-
-<a name="luat_isudata"></a>
-### int luaT_isudata(lua_State *L, int ud, const char *tname) ###
-
-Returns 1 if the object at index `ud` on the stack is a valid Torch class name `tname`.
-Returns 0 otherwise.
-
-<a name="luat_getfield"></a>
-### Checking fields of a table ###
-
-This functions check that the table at the given index `ud` on the Lua
-stack has a field named `field`, and that it is of the specified type.
-These function raises a Lua error on failure.
-
-<a name="luat_getfieldcheckudata"></a>
-## void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) ##
-
-Checks that the field named `field` of the table at index `ud` is a
-Torch class name `tname`. Returns the pointer of the C structure
-previously pushed on the stack with [luaT_pushudata](#luat_pushudata) on
-success. The function raises a Lua error on failure.
-
-<a name="luat_getfieldchecklightudata"></a>
-## void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) ##
-
-Checks that the field named `field` of the table at index `ud` is a
-lightuserdata. Returns the lightuserdata pointer on success. The function
-raises a Lua error on failure.
-
-<a name="luat_getfieldcheckint"></a>
-## int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) ##
-
-Checks that the field named `field` of the table at index `ud` is an
-int. Returns the int value pointer on success. The function raises a Lua
-error on failure.
-
-<a name="luat_getfieldcheckstring"></a>
-## const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) ##
-
-Checks that the field named `field` of the table at index `ud` is a
-string. Returns a pointer to the string on success. The function raises a
-Lua error on failure.
-
-<a name="luat_getfieldcheckboolean"></a>
-## int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) ##
-
-Checks that the field named `field` of the table at index `ud` is a
-boolean. On success, returns 1 if the boolean is `true`, 0 if it is
-`false`. The function raises a Lua error on failure.
-
-<a name="luat_getfieldchecktable"></a>
-## void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) ##
-
-Checks that the field named `field` of the table at index `ud` is a
-table. On success, push the table on the stack. The function raises a Lua
-error on failure.
-
-<a name="luat_typerror"></a>
-### int luaT_typerror(lua_State *L, int ud, const char *tname) ###
-
-Raises a `luaL_argerror` (and returns its value), claiming that the
-object at index `ud` on the stack is not of type `tname`. Note that
-this function does not check the type, it only raises an error.
-
-<a name="luat_checkboolean"></a>
-### int luaT_checkboolean(lua_State *L, int ud) ###
-
-Checks that the value at index `ud` is a boolean. On success, returns 1
-if the boolean is `true`, 0 if it is `false`. The function raises a Lua
-error on failure.
-
-<a name="luat_optboolean"></a>
-### int luaT_optboolean(lua_State *L, int ud, int def) ###
-
-Checks that the value at index `ud` is a boolean. On success, returns 1
-if the boolean is `true`, 0 if it is `false`. If there is no value at
-index `ud`, returns `def`. In any other cases, raises an error.
-
-<a name="luat_registeratname"></a>
-### void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) ###
-
-This function assume a table is on the stack. It creates a table field
-`name` in the table (if this field does not exist yet), and fill up
-`methods` in this table field.
-
-<a name="luat_classrootname"></a>
-### const char *luaT_classrootname(const char *tname) ###
-
-Assuming `tname` is of the form `A.b.c`, returns 'c'. The returned value
-shall not be freed. It is a pointer inside `tname` string.
-
-<a name="luat_classmodulename"></a>
-### int luaT_classmodulename(const char *tname, char *parent_name) ###
-Alias to `luaT_fullparentname ` for ensuring backwards compatibility;
-use of `luaT_fullparentname` is preferred.
-
-<a name="luat_fullparentname"></a>
-### int luaT_fullparentname(const char *tname, char *parent_name) ###
-
-Returns a 0-1 valued integer indicating whether `tname` has a parent module.
-Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `A.b`.
-
-<a name="luat_classmodulename"></a>
-### int luaT_outerparentname(const char *tname, char *parent_name) ###
-
-Returns a 0-1 valued integer indicating whether `tname` has a parent module.
-Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `A`.
-
-<a name="luat_classmodulename"></a>
-### int luaT_innerparentname(const char *tname, char *parent_name) ###
-
-Returns a 0-1 valued integer indicating whether `tname` has a parent module.
-Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `b`.
-
-<a name="luat_stackdump"></a>
-### void luaT_stackdump(lua_State *L) ###
-
-This function print outs the state of the Lua stack. It is useful for debug
-purposes.
-
diff --git a/contrib/lua-torch/torch7/lib/luaT/luaT.c b/contrib/lua-torch/torch7/lib/luaT/luaT.c
deleted file mode 100644
index d87f5d54c..000000000
--- a/contrib/lua-torch/torch7/lib/luaT/luaT.c
+++ /dev/null
@@ -1,1373 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#include "luaT.h"
-
-void* luaT_alloc(lua_State *L, ptrdiff_t size)
-{
- void *ptr;
-
- if(size == 0)
- return NULL;
-
- if(size < 0)
- luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?");
-
- ptr = malloc(size);
- if(!ptr)
- luaL_error(L, "$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824);
-
- return ptr;
-}
-
-void* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size)
-{
- if(!ptr)
- return(luaT_alloc(L, size));
-
- if(size == 0)
- {
- luaT_free(L, ptr);
- return NULL;
- }
-
- if(size < 0)
- luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?");
-
- ptr = realloc(ptr, size);
- if(!ptr)
- luaL_error(L, "$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824);
- return ptr;
-}
-
-void luaT_free(lua_State *L, void *ptr)
-{
- free(ptr);
-}
-
-void luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
-{
-#if LUA_VERSION_NUM == 501
- luaL_checkstack(L, nup+1, "too many upvalues");
- for (; l->name != NULL; l++) { /* fill the table with given functions */
- int i;
- lua_pushstring(L, l->name);
- for (i = 0; i < nup; i++) /* copy upvalues to the top */
- lua_pushvalue(L, -(nup+1));
- lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */
- lua_settable(L, -(nup + 3));
- }
- lua_pop(L, nup); /* remove upvalues */
-#else
- luaL_setfuncs(L, l, nup);
-#endif
-}
-
-void luaT_stackdump(lua_State *L)
-{
- int i;
- const char *tname = NULL;
- int top = lua_gettop(L);
- for(i = 1; i <= top; i++)
- {
- int t = lua_type(L, i);
- printf("%3d. ", i);
- switch(t)
- {
- case LUA_TSTRING:
- printf("'%s'", lua_tostring(L,i));
- break;
- case LUA_TBOOLEAN:
- printf(lua_toboolean(L, i) ? "true" : "false");
- break;
- case LUA_TNUMBER:
- printf("%g", lua_tonumber(L,i));
- break;
- case LUA_TUSERDATA:
- tname = luaT_typename(L, i);
- printf("userdata %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object"));
- break;
- case 10:
- tname = luaT_typename(L, i);
- printf("cdata %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object"));
- break;
- case LUA_TTABLE:
- lua_pushvalue(L, i);
- lua_rawget(L, LUA_REGISTRYINDEX);
- if(lua_isstring(L, -1))
- tname = lua_tostring(L, -1); /*luaT_typenameid(L, lua_tostring(L, -1)); */
- else
- tname = NULL;
- lua_pop(L, 1);
- if(tname)
- printf("metatable [%s]", tname);
- else
- {
- tname = luaT_typename(L, i);
- printf("table %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object"));
- }
- break;
- default:
- printf("Lua object type: %s", lua_typename(L,t));
- break;
- }
- printf("\n");
- }
- printf("---------------------------------------------\n");
-}
-
-/* metatable operator methods */
-static int luaT_mt__index(lua_State *L);
-static int luaT_mt__newindex(lua_State *L);
-static int luaT_mt__tostring(lua_State *L);
-static int luaT_mt__add(lua_State *L);
-static int luaT_mt__sub(lua_State *L);
-static int luaT_mt__mul(lua_State *L);
-static int luaT_mt__div(lua_State *L);
-static int luaT_mt__mod(lua_State *L);
-static int luaT_mt__pow(lua_State *L);
-static int luaT_mt__unm(lua_State *L);
-static int luaT_mt__concat(lua_State *L);
-static int luaT_mt__len(lua_State *L);
-static int luaT_mt__eq(lua_State *L);
-static int luaT_mt__lt(lua_State *L);
-static int luaT_mt__le(lua_State *L);
-static int luaT_mt__call(lua_State *L);
-
-/* Constructor-metatable methods */
-static int luaT_cmt__call(lua_State *L);
-static int luaT_cmt__newindex(lua_State *L);
-
-const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parent_tname,
- lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory)
-{
- return luaT_newlocalmetatable(L, tname, parent_tname,
- constructor, destructor, factory, 0);
-}
-
-const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname,
- lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx)
-{
- lua_pushcfunction(L, luaT_lua_newmetatable);
- lua_pushstring(L, tname);
- (parent_tname ? (void)lua_pushstring(L, parent_tname) : lua_pushnil(L));
- (constructor ? lua_pushcfunction(L, constructor) : lua_pushnil(L));
- (destructor ? lua_pushcfunction(L, destructor) : lua_pushnil(L));
- (factory ? lua_pushcfunction(L, factory) : lua_pushnil(L));
- (moduleidx > 0 ? lua_pushvalue(L, moduleidx) : lua_pushnil(L));
- lua_call(L, 6, 1);
- return luaT_typenameid(L, tname);
-}
-
-int luaT_pushmetatable(lua_State *L, const char *tname)
-{
- lua_getfield(L, LUA_REGISTRYINDEX, tname);
- if(lua_isnil(L, -1))
- {
- lua_pop(L, 1);
- return 0;
- }
- return 1;
-}
-
-const char *luaT_typenameid(lua_State *L, const char *tname)
-{
- if(luaT_pushmetatable(L, tname))
- {
- const char *tnameid = NULL;
- lua_rawget(L, LUA_REGISTRYINDEX);
- if(lua_isstring(L, -1))
- tnameid = lua_tostring(L, -1);
- lua_pop(L, 1); /* the string/nil */
- return tnameid;
- }
- return NULL;
-}
-
-static const char cdataname[] = ""
- "local ok, ffi = pcall(require, 'ffi')\n"
- "if ok then\n"
- " local id2name = {}\n"
- " return function(cdata, name)\n"
- " local id\n"
- " if jit then\n"
- " id = tonumber(ffi.typeof(cdata))\n"
- " else\n"
- " id = tostring(ffi.typeof(cdata))\n"
- " end\n"
- " if id then\n"
- " if name then\n"
- " id2name[id] = name\n"
- " return name\n"
- " else\n"
- " return rawget(id2name, id)\n"
- " end\n"
- " end\n"
- " return nil\n"
- " end\n"
- "else\n"
- " return function() end\n"
- "end\n";
-
-static const char* luaT_cdataname(lua_State *L, int ud, const char *tname)
-{
- lua_pushstring(L, "__cdataname");
- lua_rawget(L, LUA_REGISTRYINDEX);
- if(lua_isnil(L,-1))
- {
- lua_pop(L, 1);
-
- if(luaL_dostring(L, cdataname)) /* did something go wrong? */
- luaL_error(L, "internal error (could not load cdataname): %s", lua_tostring(L, -1));
-
- lua_pushstring(L, "__cdataname");
- lua_pushvalue(L, -2);
- lua_rawset(L, LUA_REGISTRYINDEX);
- }
- if(!lua_isfunction(L, -1)) /* should not happen */
- luaL_error(L, "internal error (cdataname is not a function)");
-
- lua_pushvalue(L, ud);
- if(tname)
- lua_pushstring(L, tname);
- if(lua_pcall(L, (tname ? 2 : 1), 1, 0))
- luaL_error(L, "internal error (cdataname): %s", lua_tostring(L, -1));
-
- tname = lua_tostring(L, -1);
- lua_pop(L, 1);
-
- return tname;
-}
-
-static void* CDATA_MT_KEY = &CDATA_MT_KEY;
-static const char cdatamt[] = ""
- "local ok, ffi = pcall(require, 'ffi')\n"
- "if ok and not jit then\n"
- " return ffi.debug().cdata_mt\n"
- "else\n"
- " return {}\n"
- "end\n";
-
-static int luaT_iscdata(lua_State *L, int ud)
-{
- int type = lua_type(L, ud);
- if(type == 10)
- return 1;
- if(type != LUA_TUSERDATA)
- return 0;
- if(!lua_getmetatable(L, ud))
- return 0;
-
- lua_pushlightuserdata(L, CDATA_MT_KEY);
- lua_rawget(L, LUA_REGISTRYINDEX);
- if (lua_isnil(L, -1))
- {
- // initialize cdata metatable
- lua_pop(L, 1);
- if(luaL_dostring(L, cdatamt))
- luaL_error(L, "internal error (could not load cdata mt): %s", lua_tostring(L, -1));
-
- lua_pushlightuserdata(L, CDATA_MT_KEY);
- lua_pushvalue(L, -2);
- lua_rawset(L, LUA_REGISTRYINDEX);
- }
-
- int iscdata = lua_rawequal(L, -1, -2);
- lua_pop(L, 2);
- return iscdata;
-}
-
-const char* luaT_typename(lua_State *L, int ud)
-{
- if(luaT_iscdata(L, ud))
- return luaT_cdataname(L, ud, NULL);
- else if(lua_getmetatable(L, ud))
- {
- const char *tname = NULL;
- lua_rawget(L, LUA_REGISTRYINDEX);
- if(lua_isstring(L, -1))
- tname = lua_tostring(L, -1);
- lua_pop(L, 1); /* the string/nil */
- return tname;
- }
- return NULL;
-}
-
-void luaT_pushudata(lua_State *L, void *udata, const char *tname)
-{
- if(udata)
- {
- void **udata_p = lua_newuserdata(L, sizeof(void*));
- *udata_p = udata;
- if(!luaT_pushmetatable(L, tname))
- luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname);
- lua_setmetatable(L, -2);
- }
- else
- lua_pushnil(L);
-}
-
-void *luaT_toudata(lua_State *L, int ud, const char *tname)
-{
- void **p = lua_touserdata(L, ud);
- if(p != NULL) /* value is a userdata? */
- {
- if(!luaT_pushmetatable(L, tname))
- luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname);
-
- /* initialize the table we want to get the metatable on */
- /* note that we have to be careful with indices, as we just inserted stuff */
- lua_pushvalue(L, (ud < 0 ? ud - 1 : ud));
- while(lua_getmetatable(L, -1)) /* get the next metatable */
- {
- lua_remove(L, -2); /* remove the previous metatable [or object, if first time] */
- if(lua_rawequal(L, -1, -2))
- {
- lua_pop(L, 2); /* remove the two metatables */
- return *p;
- }
- }
- lua_pop(L, 2); /* remove the two metatables */
- }
- return NULL;
-}
-
-int luaT_isudata(lua_State *L, int ud, const char *tname)
-{
- if(luaT_toudata(L, ud, tname))
- return 1;
- else
- return 0;
-}
-
-void *luaT_checkudata(lua_State *L, int ud, const char *tname)
-{
- void *p = luaT_toudata(L, ud, tname);
- if(!p)
- luaT_typerror(L, ud, tname);
- return p;
-}
-
-void luaT_pushlong(lua_State *L, long n)
-{
-#if LUA_VERSION_NUM >= 503
- /* Only push the value as an integer if it fits in lua_Integer,
- or if the lua_Number representation will be even worse */
- if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) {
- lua_pushinteger(L, n);
- } else {
- lua_pushnumber(L, (lua_Number)n);
- }
-#else
- lua_pushnumber(L, (lua_Number)n);
-#endif
-}
-
-long luaT_checklong(lua_State *L, int idx)
-{
-#if LUA_VERSION_NUM >= 503
- if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) {
- return (long)luaL_checkinteger(L, idx);
- } else {
- return (long)luaL_checknumber(L, idx);
- }
-#else
- return (long)luaL_checknumber(L, idx);
-#endif
-}
-
-long luaT_tolong(lua_State *L, int idx)
-{
-#if LUA_VERSION_NUM == 503
- if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) {
- return (long)lua_tointeger(L, idx);
- } else {
- return (long)lua_tonumber(L, idx);
- }
-#else
- return (long)lua_tonumber(L, idx);
-#endif
-}
-
-void luaT_pushinteger(lua_State *L, ptrdiff_t n)
-{
-#if LUA_VERSION_NUM >= 503
- /* Only push the value as an integer if it fits in lua_Integer,
- or if the lua_Number representation will be even worse */
- if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) {
- lua_pushinteger(L, n);
- } else {
- lua_pushnumber(L, (lua_Number)n);
- }
-#else
- lua_pushnumber(L, (lua_Number)n);
-#endif
-}
-
-ptrdiff_t luaT_checkinteger(lua_State *L, int idx)
-{
-#if LUA_VERSION_NUM >= 503
- if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) {
- return (ptrdiff_t)luaL_checkinteger(L, idx);
- } else {
- return (ptrdiff_t)luaL_checknumber(L, idx);
- }
-#else
- return (ptrdiff_t)luaL_checknumber(L, idx);
-#endif
-}
-
-void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname)
-{
- void *p;
- lua_getfield(L, ud, field);
- if(lua_isnil(L, -1))
- luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field);
- p = luaT_toudata(L, -1, tname);
- if(!p)
- luaL_error(L, "bad argument #%d (field %s is not a %s)", ud, field, tname);
- return p;
-}
-
-void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field)
-{
- void *p;
- lua_getfield(L, ud, field);
- if(lua_isnil(L, -1))
- luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field);
-
- if(!lua_islightuserdata(L, -1))
- luaL_error(L, "bad argument #%d (field %s is not a light userdata)", ud, field);
-
- p = lua_touserdata(L, -1);
-
- return p;
-}
-
-double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field)
-{
- lua_getfield(L, ud, field);
- if(lua_isnil(L, -1))
- luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field);
- if(!lua_isnumber(L, -1))
- luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field);
- return lua_tonumber(L, -1);
-}
-
-int luaT_getfieldcheckint(lua_State *L, int ud, const char *field)
-{
- lua_getfield(L, ud, field);
- if(lua_isnil(L, -1))
- luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field);
- if(!lua_isnumber(L, -1))
- luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field);
- return (int)lua_tonumber(L, -1);
-}
-
-const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field)
-{
- lua_getfield(L, ud, field);
- if(lua_isnil(L, -1))
- luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field);
- if(!lua_isstring(L, -1))
- luaL_error(L, "bad argument #%d (field %s is not a string)", ud, field);
- return lua_tostring(L, -1);
-}
-
-int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field)
-{
- lua_getfield(L, ud, field);
- if(lua_isnil(L, -1))
- luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field);
- if(!lua_isboolean(L, -1))
- luaL_error(L, "bad argument #%d (field %s is not a boolean)", ud, field);
- return lua_toboolean(L, -1);
-}
-
-void luaT_getfieldchecktable(lua_State *L, int ud, const char *field)
-{
- lua_getfield(L, ud, field);
- if(lua_isnil(L, -1))
- luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field);
- if(!lua_istable(L, -1))
- luaL_error(L, "bad argument #%d (field %s is not a table)", ud, field);
-}
-
-/**** type checks as in luaL ****/
-int luaT_typerror(lua_State *L, int ud, const char *tname)
-{
- const char *msg;
- const char *tnameud = luaT_typename(L, ud);
-
- if(!tnameud)
- tnameud = lua_typename(L, ud);
-
- msg = lua_pushfstring(L, "%s expected, got %s",
- tname,
- (tnameud ? tnameud : "unknown object"));
-
- return luaL_argerror(L, ud, msg);
-}
-
-int luaT_checkboolean(lua_State *L, int ud)
-{
- if(!lua_isboolean(L, ud))
- luaT_typerror(L, ud, lua_typename(L, LUA_TBOOLEAN));
- return lua_toboolean(L, ud);
-}
-
-int luaT_optboolean(lua_State *L, int ud, int def)
-{
- if(lua_isnoneornil(L,ud))
- return def;
-
- return luaT_checkboolean(L, ud);
-}
-
-void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name)
-{
- int idx = lua_gettop(L);
-
- luaL_checktype(L, idx, LUA_TTABLE);
- lua_pushstring(L, name);
- lua_rawget(L, idx);
-
- if(lua_isnil(L, -1))
- {
- lua_pop(L, 1);
- lua_pushstring(L, name);
- lua_newtable(L);
- lua_rawset(L, idx);
-
- lua_pushstring(L, name);
- lua_rawget(L, idx);
- }
-
- luaT_setfuncs(L, methods, 0);
- lua_pop(L, 1);
-}
-
-
-/* returns the name of the class itself (sans nesting) */
-const char* luaT_classrootname(const char *tname)
-{
- int idx;
- int sz = strlen(tname);
-
- for(idx = sz-1; idx >= 0 ; idx--)
- {
- if(tname[idx] == '.')
- return tname+idx+1;
- }
- return tname;
-}
-
-/* parent_name must be a buffer at least as big as tname.
- * If class has a parent, returns true; and, sets
- * parent name to that of full parent hierarchy (e.g.
- * given class `A.b.c`, sets parent_name to `A.b`)
- */
-int luaT_fullparentname(const char *tname, char *parent_name)
-{
- int sz = strlen(tname);
- int idx;
- for(idx = sz-1; idx > 0 ; idx--)
- if(tname[idx] == '.' || tname[idx] == '\0') break;
-
- if (idx > 0) strncpy(parent_name, tname, idx);
- parent_name[idx] = '\0';
- return tname[idx] == '.';
-}
-
-/* alias for ensuring backwards compatibility;
- * use of luaT_fullparentname is preferred.
- */
-int luaT_classmodulename(const char *tname, char *parent_name)
-{
- return luaT_fullparentname(tname, parent_name);
-}
-
-/* parent_name must be a buffer at least as big as tname.
- * If class has a parent, returns true; and, sets
- * parent name to that of outermost parent (e.g.
- * given class `A.b.c`, sets parent_name to `A`)
- */
-int luaT_outerparentname(const char *tname, char *parent_name)
-{
- char chars[] = {'.', '\0'};
- size_t idx;
- idx = strcspn(tname, chars);
- strncpy(parent_name, tname, idx);
- parent_name[idx] = '\0';
- return tname[idx] == '.';
-}
-
-/* parent_name must be a buffer at least as big as tname.
- * If class has a parent, returns true; and, sets parent
- * name to that of innermost parent (e.g. given class
- * `A.b.c`, sets parent_name to `b`). In the comments
- * below, the inner parent name is abbreviated as IPN.
- */
-int luaT_innerparentname(const char *tname, char *parent_name)
-{
- int sz = strlen(tname);
- int tail, head;
- for(tail = sz-1; tail >= 0 ; tail--) // tail points to
- if(tname[tail] == '.') break; // just past IPN
-
- if (tail == 0) return 0;
-
- for(head = tail-1; head >= 0; head--) // head points to
- if(tname[head] == '.') break; // just before IPN
-
- head += 1; // update head to start of IPN
- tail -= head; // update tail to strlen(IPN)
- strncpy(parent_name, tname+head, tail);
- parent_name[tail] = '\0';
- return 1;
-}
-
-/* Method for pushing a class's immediate parent to the
- * stack (e.g. given class `A.b.c`, pushes `b` to the stack)
- */
-void luaT_getinnerparent(lua_State *L, const char *tname)
-{
- /* Local variables */
- char term[256];
- char chars[] = {'.', '\0'};
- const char *tname_full = tname; // used for error case
-
- /* Get outermost table from Lua */
- int n = strcspn(tname, chars);
- strncpy(term, tname, n);
- term[n] = '\0';
- lua_getglobal(L, term);
- tname += n + 1;
-
- /* Traverse hierarchy down to last table*/
- n = strcspn(tname, chars);
- while(n < strlen(tname))
- {
- /* Check that current parent is a table (i.e. a module) */
- if(!lua_istable(L, -1)){
- strncpy(term, tname_full, tname - tname_full - 1);
- term[tname - tname_full] = '\0';
- luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname_full, term);
- }
- strncpy(term, tname, n);
- term[n] = '\0';
- lua_getfield(L, -1, term);
- lua_remove(L, -2);
- tname += n + 1;
- n = strcspn(tname, chars); // prepare for next
- }
-
- /* Check that resulting parent is a table (i.e. a module) */
- if(!lua_istable(L, -1)){
- strncpy(term, tname_full, tname - tname_full - 1);
- term[tname - tname_full] = '\0';
- luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname_full, term);
- }
-}
-
-
-int luaT_lua_newmetatable(lua_State *L)
-{
- /* Local Variables */
- const char* tname = luaL_checkstring(L, 1);
- char parent_name[256];
- int is_in_module = 0;
-
- /* Argument Checking */
- lua_settop(L, 6);
- luaL_argcheck(L, lua_isnoneornil(L, 2) || lua_isstring(L, 2), 2, "parent class name or nil expected");
- luaL_argcheck(L, lua_isnoneornil(L, 3) || lua_isfunction(L, 3), 3, "constructor function or nil expected");
- luaL_argcheck(L, lua_isnoneornil(L, 4) || lua_isfunction(L, 4), 4, "destructor function or nil expected");
- luaL_argcheck(L, lua_isnoneornil(L, 5) || lua_isfunction(L, 5), 5, "factory function or nil expected");
- luaL_argcheck(L, lua_isnoneornil(L, 6) || lua_istable(L, 6), 6, "module table or nil expected");
-
- /* Push immediate parent module to stack */
- if(lua_isnoneornil(L, 6)) {
- lua_pop(L, 1); /* remove the nil */
- is_in_module = luaT_fullparentname(tname, parent_name);
- if (is_in_module)
- luaT_getinnerparent(L, tname);
- else
- lua_pushglobaltable(L);
- }
-
- if(!lua_istable(L, -1))
- luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname, parent_name);
-
- /* we first create the new metaclass if we have to */
- if(!luaT_pushmetatable(L, tname))
- {
- /* create the metatable */
- lua_newtable(L);
-
- /* registry[name] = metatable */
- lua_pushvalue(L, -1);
- lua_setfield(L, LUA_REGISTRYINDEX, tname);
-
- /* registry[metatable] = tname */
- lua_pushvalue(L, -1);
- lua_pushstring(L, tname);
- lua_rawset(L, LUA_REGISTRYINDEX);
-
- /* __index handling */
- lua_pushcfunction(L, luaT_mt__index);
- lua_setfield(L, -2, "__index");
-
- /* __newindex handling */
- lua_pushcfunction(L, luaT_mt__newindex);
- lua_setfield(L, -2, "__newindex");
-
- /* __typename contains the typename */
- lua_pushstring(L, tname);
- lua_setfield(L, -2, "__typename");
-
- /* __metatable is self */
- lua_pushvalue(L, -1);
- lua_setfield(L, -2, "__metatable");
-
- /* by default, __version equals 1 */
- lua_pushnumber(L, 1);
- lua_setfield(L, -2, "__version");
-
- /* assign default operator functions */
- lua_pushcfunction(L, luaT_mt__tostring);
- lua_setfield(L, -2, "__tostring");
-
- lua_pushcfunction(L, luaT_mt__add);
- lua_setfield(L, -2, "__add");
-
- lua_pushcfunction(L, luaT_mt__sub);
- lua_setfield(L, -2, "__sub");
-
- lua_pushcfunction(L, luaT_mt__mul);
- lua_setfield(L, -2, "__mul");
-
- lua_pushcfunction(L, luaT_mt__div);
- lua_setfield(L, -2, "__div");
-
- lua_pushcfunction(L, luaT_mt__mod);
- lua_setfield(L, -2, "__mod");
-
- lua_pushcfunction(L, luaT_mt__pow);
- lua_setfield(L, -2, "__pow");
-
- lua_pushcfunction(L, luaT_mt__unm);
- lua_setfield(L, -2, "__unm");
-
- lua_pushcfunction(L, luaT_mt__concat);
- lua_setfield(L, -2, "__concat");
-
- lua_pushcfunction(L, luaT_mt__len);
- lua_setfield(L, -2, "__len");
-
- lua_pushcfunction(L, luaT_mt__eq);
- lua_setfield(L, -2, "__eq");
-
- lua_pushcfunction(L, luaT_mt__lt);
- lua_setfield(L, -2, "__lt");
-
- lua_pushcfunction(L, luaT_mt__le);
- lua_setfield(L, -2, "__le");
-
- lua_pushcfunction(L, luaT_mt__call);
- lua_setfield(L, -2, "__call");
- }
-
- /* we assign the parent class if necessary */
- if(!lua_isnoneornil(L, 2))
- {
- if(lua_getmetatable(L, -1))
- luaL_error(L, "class %s has been already assigned a parent class\n", tname);
- else
- {
- const char* parent_tname = luaL_checkstring(L, 2);
- if(!luaT_pushmetatable(L, parent_tname))
- luaL_error(L, "bad argument #2 (invalid parent class name %s)", parent_tname);
- lua_setmetatable(L, -2);
- }
- }
-
- /* register the destructor function */
- if(!lua_isnoneornil(L, 4))
- {
- /* does it exists already? */
- lua_pushstring(L, "__gc");
- lua_rawget(L, -2);
-
- if(lua_isnil(L, -1))
- {
- lua_pop(L, 1); /* pop nil */
- lua_pushstring(L, "__gc");
- lua_pushvalue(L, 4);
- lua_rawset(L, -3);
- }
- else
- luaL_error(L, "%s has been already assigned a destructor", tname);
- }
-
- /* register the factory function */
- if(!lua_isnoneornil(L, 5))
- {
- /* does it exists already? */
- lua_pushstring(L, "__factory");
- lua_rawget(L, -2);
-
- if(lua_isnil(L, -1))
- {
- lua_pop(L, 1); /* pop nil */
- lua_pushstring(L, "__factory");
- lua_pushvalue(L, 5);
- lua_rawset(L, -3);
- }
- else
- luaL_error(L, "%s has been already assigned a factory", tname);
- }
-
- /******** Constructor table and metatable ********/
- lua_pushstring(L, "__constructor");
- lua_rawget(L, -2);
- if(lua_isnil(L, -1))
- {
- lua_pop(L, 1); /* pop nil */
- lua_newtable(L); /* fancy table */
- lua_newtable(L); /* fancy metatable */
-
- lua_pushvalue(L, -3); /* metatable */
- lua_setfield(L, -2, "__index"); /* so we can get the methods */
-
- lua_pushcfunction(L, luaT_cmt__newindex);
- lua_setfield(L, -2, "__newindex"); /* so we add new methods */
-
- lua_pushcfunction(L, luaT_cmt__call);
- lua_setfield(L, -2, "__call"); /* so we can create, we are here for only that */
-
- lua_pushvalue(L, -3);
- lua_setfield(L, -2, "__metatable"); /* redirect to metatable with methods */
-
- lua_setmetatable(L, -2); /* constructor metatable is ... this fancy metatable */
-
- /* set metatable[__constructor] = constructor-metatable */
- lua_pushstring(L, "__constructor");
- lua_pushvalue(L, -2);
- lua_rawset(L, -4);
- }
-
- /* register the constructor function */
- if(!lua_isnoneornil(L, 3))
- {
- /* get constructor metatable */
- lua_getmetatable(L, -1);
-
- /* does it exists already? */
- lua_pushstring(L, "__new");
- lua_rawget(L, -2);
-
- if(lua_isnil(L, -1))
- {
- lua_pop(L, 1); /* pop nil */
- lua_pushstring(L, "__new");
- lua_pushvalue(L, 3);
- lua_rawset(L, -3);
-
- /* set "new" in the metatable too */
- lua_pushstring(L, "new");
- lua_pushvalue(L, 3);
- lua_rawset(L, -5);
- }
- else
- luaL_error(L, "%s has been already assigned a constructor", tname);
-
- /* pop constructor metatable */
- lua_pop(L, 1);
- }
-
- /* module.name = constructor metatable */
- lua_setfield(L, 6, luaT_classrootname(tname));
-
- return 1; /* returns the metatable */
-}
-
-/* Lua only utility functions */
-
-/* add any custom type, provided the object has a metatable */
-int luaT_lua_metatype(lua_State *L)
-{
- if( (lua_gettop(L) != 2) && (lua_gettop(L) != 3) )
- luaL_error(L, "expecting: string table [ctype]");
-
- luaL_checkstring(L, 1);
- luaL_checktype(L, 2, LUA_TTABLE);
-
- if(lua_gettop(L) == 3)
- {
- if(!luaT_cdataname(L, 3, lua_tostring(L, 1)))
- luaL_error(L, "could not register cdata type -- missing ffi library?");
- }
-
- /* registry[name] = metatable */
- lua_pushvalue(L, 1);
- lua_pushvalue(L, 2);
- lua_rawset(L, LUA_REGISTRYINDEX);
-
- /* registry[metatable] = tname */
- lua_pushvalue(L, 2);
- lua_pushvalue(L, 1);
- lua_rawset(L, LUA_REGISTRYINDEX);
-
- return 0;
-}
-
-/* return a userdata from a C pointer */
-/* you are better to know what you are doing */
-int luaT_lua_pushudata(lua_State *L)
-{
- void *udata = NULL;
- const char *tname = luaL_checkstring(L, 2);
-
- if(lua_type(L, 1) == 10)
- udata = *((void**)lua_topointer(L, 1));
- else if(luaT_iscdata(L, 1))
- udata = ((void**)lua_topointer(L, 1))[4];
- else if(lua_isnumber(L, 1))
- udata = (void*)(uintptr_t)lua_tonumber(L, 1);
- else
- luaL_argerror(L, 1, "expecting number or cdata");
-
- luaT_pushudata(L, udata, tname);
-
- return 1;
-}
-
-int luaT_lua_factory(lua_State *L)
-{
- const char* tname = luaL_checkstring(L, 1);
- if(luaT_pushmetatable(L, tname) && !lua_isnil(L, -1))
- {
- lua_pushstring(L, "__factory");
- lua_rawget(L, -2);
- }
- else
- {
- lua_pushnil(L);
- }
- return 1;
-}
-
-int luaT_lua_getconstructortable(lua_State *L)
-{
- const char* tname = luaL_checkstring(L, 1);
- if(luaT_pushmetatable(L, tname))
- {
- lua_pushstring(L, "__constructor");
- lua_rawget(L, -2);
- return 1;
- }
- return 0;
-}
-
-
-int luaT_lua_typename(lua_State *L)
-{
- const char* tname = NULL;
- luaL_checkany(L, 1);
- if((tname = luaT_typename(L, 1)))
- {
- lua_pushstring(L, tname);
- return 1;
- }
- return 0;
-}
-
-int luaT_lua_isequal(lua_State *L)
-{
- if(lua_isuserdata(L, 1) && lua_isuserdata(L, 2))
- {
- void **u1, **u2;
- luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected");
- luaL_argcheck(L, luaT_typename(L, 2), 2, "Torch object expected");
-
- u1 = lua_touserdata(L, 1);
- u2 = lua_touserdata(L, 2);
- if(*u1 == *u2)
- lua_pushboolean(L, 1);
- else
- lua_pushboolean(L, 0);
- }
- else if(lua_istable(L, 1) && lua_istable(L, 2))
- lua_pushboolean(L, lua_rawequal(L, 1, 2));
- else
- lua_pushboolean(L, 0);
- return 1;
-}
-
-static void luaT_pushpointer(lua_State *L, const void *ptr)
-{
-#if LUA_VERSION_NUM >= 503
- // this assumes that lua_Integer is a ptrdiff_t
- if (sizeof(void *) > sizeof(lua_Integer))
- luaL_error(L, "Pointer value can't be represented as a Lua integer (an overflow would occur)");
- lua_pushinteger(L, (uintptr_t)(ptr));
-#else
- // 2^53 - this assumes that lua_Number is a double
- if ((uintptr_t)ptr > 9007199254740992LLU)
- luaL_error(L, "Pointer value can't be represented as a Lua number (an overflow would occur)");
- lua_pushnumber(L, (uintptr_t)(ptr));
-#endif
-}
-
-int luaT_lua_pointer(lua_State *L)
-{
- if(lua_type(L, 1) == 10) /* luajit cdata */
- {
- /* we want the pointer holded by cdata */
- /* not the pointer on the cdata object */
- const void* ptr = *((void**)lua_topointer(L, 1));
- luaT_pushpointer(L, ptr);
- return 1;
- }
- else if (luaT_iscdata(L, 1)) /* luaffi cdata */
- {
- void** ptr = (void**)lua_touserdata(L, 1);
- luaT_pushpointer(L, ptr[4]);
- return 1;
- }
- else if(lua_isuserdata(L, 1))
- {
- void **ptr;
- luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected");
- ptr = lua_touserdata(L, 1);
- luaT_pushpointer(L, *ptr);
- return 1;
- }
- else if(lua_istable(L, 1) || lua_isthread(L, 1) || lua_isfunction(L, 1))
- {
- const void* ptr = lua_topointer(L, 1);
- luaT_pushpointer(L, ptr);
- return 1;
- }
- else if(lua_isstring(L, 1))
- {
- const char* ptr = lua_tostring(L, 1);
- luaT_pushpointer(L, ptr);
- return 1;
- }
- else
- luaL_error(L, "Torch object, table, thread, cdata or function expected");
-
- return 0;
-}
-
-int luaT_lua_setenv(lua_State *L)
-{
- if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1))
- luaL_typerror(L, 1, "function or userdata");
- luaL_checktype(L, 2, LUA_TTABLE);
- lua_setuservalue(L, 1);
- return 0;
-}
-
-int luaT_lua_getenv(lua_State *L)
-{
- if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1))
- luaL_typerror(L, 1, "function or userdata");
- lua_getuservalue(L, 1);
- if (lua_isnil(L, -1))
- lua_newtable(L);
- return 1;
-}
-
-int luaT_lua_getmetatable(lua_State *L)
-{
- const char *tname = luaL_checkstring(L, 1);
- if(luaT_pushmetatable(L, tname))
- return 1;
- return 0;
-}
-
-int luaT_lua_version(lua_State *L)
-{
- luaL_checkany(L, 1);
-
- if(luaT_iscdata(L, 1))
- {
- const char *tname = luaT_cdataname(L, 1, NULL);
- if(tname)
- {
- luaT_pushmetatable(L, tname);
- lua_pushstring(L, "__version");
- lua_rawget(L, -2);
- return 1;
- }
- return 0;
- }
- else if(lua_getmetatable(L, 1))
- {
- lua_pushstring(L, "__version");
- lua_rawget(L, -2);
- return 1;
- }
- return 0;
-}
-
-int luaT_lua_setmetatable(lua_State *L)
-{
- const char *tname = luaL_checkstring(L, 2);
- luaL_checktype(L, 1, LUA_TTABLE);
-
- if(!luaT_pushmetatable(L, tname))
- luaL_error(L, "unknown typename %s\n", tname);
- lua_setmetatable(L, 1);
-
- return 1;
-}
-
-/* metatable operator methods */
-static int luaT_mt__index(lua_State *L)
-{
- if(!lua_getmetatable(L, 1))
- luaL_error(L, "critical internal indexing error: no metatable found");
-
- if(!lua_istable(L, -1))
- luaL_error(L, "critical internal indexing error: not a metatable");
-
- /* test for __index__ method first */
- lua_getfield(L, -1, "__index__");
- if(!lua_isnil(L, -1))
- {
- int result;
-
- if(!lua_isfunction(L, -1))
- luaL_error(L, "critical internal indexing error: __index__ is not a function");
-
- lua_pushvalue(L, 1);
- lua_pushvalue(L, 2);
-
- lua_call(L, 2, LUA_MULTRET); /* DEBUG: risque: faut vraiment retourner 1 ou 2 valeurs... */
-
- result = lua_toboolean(L, -1);
- lua_pop(L, 1);
-
- if(result)
- return 1;
-
- /* on the stack: 1. the object 2. the value 3. the metatable */
- /* apparently, __index wants only one element returned */
- /* return lua_gettop(L)-3; */
-
- }
- else
- lua_pop(L, 1); /* remove nil __index__ on the stack */
-
- lua_pushvalue(L, 2);
- lua_gettable(L, -2);
-
- return 1;
-}
-
-static int luaT_mt__newindex(lua_State *L)
-{
- if(!lua_getmetatable(L, 1))
- luaL_error(L, "critical internal indexing error: no metatable found");
-
- if(!lua_istable(L, -1))
- luaL_error(L, "critical internal indexing error: not a metatable");
-
- /* test for __newindex__ method first */
- lua_getfield(L, -1, "__newindex__");
- if(!lua_isnil(L, -1))
- {
- int result;
-
- if(!lua_isfunction(L, -1))
- luaL_error(L, "critical internal indexing error: __newindex__ is not a function");
-
- lua_pushvalue(L, 1);
- lua_pushvalue(L, 2);
- lua_pushvalue(L, 3);
-
- lua_call(L, 3, 1); /* DEBUG: risque: faut vraiment retourner qqch */
-
- result = lua_toboolean(L, -1);
- lua_pop(L, 1);
-
- if(result)
- return 0;
- }
- else
- lua_pop(L, 1); /* remove nil __newindex__ on the stack */
-
- lua_pop(L, 1); /* pop the metatable */
- if(lua_istable(L, 1))
- lua_rawset(L, 1);
- else
- luaL_error(L, "the class %s cannot be indexed", luaT_typename(L, 1));
-
- return 0;
-}
-
-
-#define MT_UNI_OPERATOR_GET_HANDLER(NAME) \
- if(!lua_getmetatable(L, 1)) \
- luaL_error(L, "internal error in __" #NAME ": no metatable");
-
-#define MT_BIN_OPERATOR_GET_HANDLER(NAME) \
- if(!lua_getmetatable(L, 1) && !lua_getmetatable(L,2) ) \
- luaL_error(L, "internal error in __" #NAME \
- ": no metatable in both operands");
-
-#define MT_DECLARE_OPERATOR_BODY(NAME, NIL_BEHAVIOR) \
- \
- lua_getfield(L, -1, "__" #NAME "__"); \
- if(lua_isnil(L, -1)) \
- { \
- NIL_BEHAVIOR; \
- } \
- else \
- { \
- if(lua_isfunction(L, -1)) \
- { \
- lua_insert(L, 1); /* insert function */ \
- lua_pop(L, 1); /* remove metatable */ \
- lua_call(L, lua_gettop(L)-1, LUA_MULTRET); \
- /* we return the result of the call */ \
- return lua_gettop(L); \
- } \
- /* we return the thing the user left in __tostring__ */ \
- } \
- return 0; \
-
-/* note: check dans metatable pour ca, donc necessaire */
-#define MT_DECLARE_OPERATOR(NAME, NIL_BEHAVIOR) \
- int luaT_mt__##NAME(lua_State *L) \
- { \
- MT_UNI_OPERATOR_GET_HANDLER(NAME) \
- MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR) \
- }
-
-#define MT_DECLARE_BIN_OPERATOR(NAME, NIL_BEHAVIOR) \
- int luaT_mt__##NAME(lua_State *L) \
- { \
- MT_BIN_OPERATOR_GET_HANDLER(NAME) \
- MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR) \
- }
-
-
-#define BIN_OPERATOR_ERROR(NAME) \
- luaL_error(L, "both %s and %s have no " #NAME " operator", \
- luaT_typename(L, 1), luaT_typename(L,2))
-
-MT_DECLARE_BIN_OPERATOR(add, BIN_OPERATOR_ERROR(addition) )
-MT_DECLARE_BIN_OPERATOR(sub, BIN_OPERATOR_ERROR(substraction) )
-MT_DECLARE_BIN_OPERATOR(mul, BIN_OPERATOR_ERROR(multiplication) )
-MT_DECLARE_BIN_OPERATOR(div, BIN_OPERATOR_ERROR(division) )
-MT_DECLARE_BIN_OPERATOR(mod, BIN_OPERATOR_ERROR(modulo) )
-MT_DECLARE_BIN_OPERATOR(pow, BIN_OPERATOR_ERROR(power) )
-MT_DECLARE_BIN_OPERATOR(concat, BIN_OPERATOR_ERROR(concat) )
-MT_DECLARE_BIN_OPERATOR(eq,
- lua_settop(L, 2);
- lua_pushcfunction(L, luaT_lua_isequal);
- lua_insert(L, 1);
- lua_call(L, 2, 1);
- return 1;)
-MT_DECLARE_BIN_OPERATOR(lt, BIN_OPERATOR_ERROR(less-than) )
-MT_DECLARE_BIN_OPERATOR(le, BIN_OPERATOR_ERROR(less-equal) )
-
-MT_DECLARE_OPERATOR(tostring,
- lua_pushstring(L, luaT_typename(L, 1));
- return 1;)
-MT_DECLARE_OPERATOR(call, luaL_error(L, "%s has no call operator", luaT_typename(L, 1)))
-MT_DECLARE_OPERATOR(unm, luaL_error(L, "%s has no negation operator", luaT_typename(L, 1)))
-MT_DECLARE_OPERATOR(len, luaL_error(L, "%s has no length operator", luaT_typename(L, 1)))
-
-
-/* constructor metatable methods */
-int luaT_cmt__call(lua_State *L)
-{
- if(!lua_istable(L, 1))
- luaL_error(L, "internal error in __call: not a constructor table");
-
- if(!lua_getmetatable(L, 1))
- luaL_error(L, "internal error in __call: no metatable available");
-
- lua_pushstring(L, "__new");
- lua_rawget(L, -2);
-
- if(lua_isnil(L, -1))
- luaL_error(L, "no constructor available");
-
- lua_remove(L, 1); /* remove constructor atable */
- lua_insert(L, 1); /* insert constructor */
- lua_pop(L, 1); /* remove fancy metatable */
-
- lua_call(L, lua_gettop(L)-1, LUA_MULTRET);
- return lua_gettop(L);
-}
-
-int luaT_cmt__newindex(lua_State *L)
-{
- if(!lua_istable(L, 1))
- luaL_error(L, "internal error in __newindex: not a constructor table");
-
- if(!lua_getmetatable(L, 1))
- luaL_error(L, "internal error in __newindex: no metatable available");
-
- lua_pushstring(L, "__metatable");
- lua_rawget(L, -2);
-
- if(!lua_istable(L, -1))
- luaL_error(L, "internal error in __newindex: no metaclass available");
-
- lua_insert(L, 2);
- lua_pop(L, 1); /* remove the metatable over the constructor table */
-
- lua_rawset(L, -3);
-
- return 0;
-}
-
-/******************** deprecated functions ********************/
-int luaT_pushmetaclass(lua_State *L, const char *tname)
-{
- return luaT_pushmetatable(L, tname);
-}
-
-const char* luaT_id(lua_State *L, int ud)
-{
- return luaT_typename(L, ud);
-}
-
-const char* luaT_id2typename(lua_State *L, const char *id)
-{
- return id;
-}
-
-const char* luaT_typename2id(lua_State *L, const char *tname)
-{
- return luaT_typenameid(L, tname);
-}
-
-int luaT_getmetaclass(lua_State *L, int index)
-{
- return lua_getmetatable(L, index);
-}
-
-const char* luaT_checktypename2id(lua_State *L, const char *tname)
-{
- const char* id = luaT_typenameid(L, tname);
- if(!id)
- luaL_error(L, "unknown class <%s>", tname);
- return id;
-}
-
-void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id)
-{
- luaT_registeratname(L, methods, id);
-}
-
-/**************************************************************/
diff --git a/contrib/lua-torch/torch7/lib/luaT/luaT.h b/contrib/lua-torch/torch7/lib/luaT/luaT.h
deleted file mode 100644
index 2479a1dc1..000000000
--- a/contrib/lua-torch/torch7/lib/luaT/luaT.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef LUAT_UTILS_INC
-#define LUAT_UTILS_INC
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#include <lua.h>
-#include <lauxlib.h>
-#ifdef __cplusplus
-}
-#endif
-
-#ifndef LUA_EXTERNC
-# ifdef __cplusplus
-# define LUA_EXTERNC extern "C"
-# else
-# define LUA_EXTERNC extern
-# endif
-#endif
-
-#if (defined(_MSC_VER) || defined(__MINGW32__))
-# define DLL_EXPORT __declspec(dllexport)
-# define DLL_IMPORT __declspec(dllimport)
-# ifdef luaT_EXPORTS
-# define LUAT_API LUA_EXTERNC DLL_EXPORT
-# else
-# define LUAT_API LUA_EXTERNC DLL_IMPORT
-# endif
-#else
-# define DLL_EXPORT
-# define DLL_IMPORT
-# define LUAT_API LUA_EXTERNC
-#endif
-
-#if LUA_VERSION_NUM == 501
-# define lua_pushglobaltable(L) lua_pushvalue(L, LUA_GLOBALSINDEX)
-# define lua_setuservalue lua_setfenv
-# define lua_getuservalue lua_getfenv
-#else
-# define lua_objlen lua_rawlen
-static int luaL_typerror(lua_State *L, int narg, const char *tname)
-{
- return luaL_error(L, "%s expected, got %s", tname, luaL_typename(L, narg));
-}
-#endif
-
-
-/* C functions */
-
-LUAT_API void* luaT_alloc(lua_State *L, ptrdiff_t size);
-LUAT_API void* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size);
-LUAT_API void luaT_free(lua_State *L, void *ptr);
-
-LUAT_API void luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup);
-
-LUAT_API const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname,
- lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx);
-
-LUAT_API const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname,
- lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory);
-
-LUAT_API int luaT_pushmetatable(lua_State *L, const char *tname);
-
-LUAT_API const char* luaT_typenameid(lua_State *L, const char *tname);
-LUAT_API const char* luaT_typename(lua_State *L, int ud);
-
-LUAT_API void luaT_pushudata(lua_State *L, void *udata, const char *tname);
-LUAT_API void *luaT_toudata(lua_State *L, int ud, const char *tname);
-LUAT_API int luaT_isudata(lua_State *L, int ud, const char *tname);
-LUAT_API void *luaT_checkudata(lua_State *L, int ud, const char *tname);
-
-LUAT_API void luaT_pushlong(lua_State *L, long n);
-LUAT_API long luaT_checklong(lua_State *L, int idx);
-LUAT_API long luaT_tolong(lua_State *L, int idx);
-
-LUAT_API void luaT_pushinteger(lua_State *L, ptrdiff_t n);
-LUAT_API ptrdiff_t luaT_checkinteger(lua_State *L, int idx);
-
-LUAT_API void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname);
-LUAT_API void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field);
-LUAT_API double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field);
-LUAT_API int luaT_getfieldcheckint(lua_State *L, int ud, const char *field);
-LUAT_API const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field);
-LUAT_API int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field);
-LUAT_API void luaT_getfieldchecktable(lua_State *L, int ud, const char *field);
-
-LUAT_API int luaT_typerror(lua_State *L, int ud, const char *tname);
-
-LUAT_API int luaT_checkboolean(lua_State *L, int ud);
-LUAT_API int luaT_optboolean(lua_State *L, int ud, int def);
-
-LUAT_API void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name);
-
-/* utility functions */
-LUAT_API const char *luaT_classrootname(const char *tname);
-LUAT_API int luaT_classmodulename(const char *tname, char *module_name);
-
-/* debug */
-LUAT_API void luaT_stackdump(lua_State *L);
-
-/* Lua functions */
-LUAT_API int luaT_lua_newmetatable(lua_State *L);
-LUAT_API int luaT_lua_factory(lua_State *L);
-LUAT_API int luaT_lua_getconstructortable(lua_State *L);
-LUAT_API int luaT_lua_typename(lua_State *L);
-LUAT_API int luaT_lua_isequal(lua_State *L);
-LUAT_API int luaT_lua_pointer(lua_State *L);
-LUAT_API int luaT_lua_setenv(lua_State *L);
-LUAT_API int luaT_lua_getenv(lua_State *L);
-LUAT_API int luaT_lua_getmetatable(lua_State *L);
-LUAT_API int luaT_lua_version(lua_State *L);
-LUAT_API int luaT_lua_setmetatable(lua_State *L);
-LUAT_API int luaT_lua_metatype(lua_State *L);
-LUAT_API int luaT_lua_pushudata(lua_State *L);
-
-/* deprecated functions */
-/* ids have been replaced by string names to identify classes */
-/* comments show what function (that you should use) they call now */
-#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
-#define LUAT_DEPRECATED __attribute__((__deprecated__))
-#elif (defined(_MSC_VER) || defined(__MINGW32__))
-#define LUAT_DEPRECATED __declspec(deprecated)
-#else
-#define LUAT_DEPRECATED
-#endif
-
-LUAT_API LUAT_DEPRECATED int luaT_pushmetaclass(lua_State *L, const char *tname); /* same as luaT_pushmetatable */
-LUAT_API LUAT_DEPRECATED const char* luaT_id(lua_State *L, int ud); /* same as luaT_typename */
-LUAT_API LUAT_DEPRECATED const char* luaT_id2typename(lua_State *L, const char *id); /* same as luaT_typenameid */
-LUAT_API LUAT_DEPRECATED const char* luaT_typename2id(lua_State *L, const char*); /* same as luaT_typenameid */
-LUAT_API LUAT_DEPRECATED int luaT_getmetaclass(lua_State *L, int index); /* same as luaT_getmetatable */
-LUAT_API LUAT_DEPRECATED const char* luaT_checktypename2id(lua_State *L, const char *tname); /* same as luaT_typenameid */
-LUAT_API LUAT_DEPRECATED void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id); /* same as luaT_registeratname */
-
-#endif
diff --git a/contrib/lua-torch/torch7/lib/luaT/luaTConfig.cmake.in b/contrib/lua-torch/torch7/lib/luaT/luaTConfig.cmake.in
deleted file mode 100644
index bfb20b87a..000000000
--- a/contrib/lua-torch/torch7/lib/luaT/luaTConfig.cmake.in
+++ /dev/null
@@ -1,9 +0,0 @@
-# Find the luaT includes and library
-#
-# LUAT_INCLUDE_DIR -- where to find the includes
-# LUAT_LIBRARIES -- list of libraries to link against
-# LUAT_FOUND -- set to 1 if found
-
-SET(LUAT_FOUND 1)
-SET(LUAT_INCLUDE_DIR "@LUAT_INCLUDE_DIR@")
-SET(LUAT_LIBRARIES "@LUAT_LIBRARIES@")