diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-01 15:13:04 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2019-07-01 15:13:04 +0100 |
commit | 891b250b452f8e1963a99931f241ac75e34d0281 (patch) | |
tree | ab56b822aca3cc6d02a3c9afbe8ca2f6d1c0381f /contrib/lua-torch/torch7/lib | |
parent | 38691d998d019ac0fba95720c337e3f9badf55c4 (diff) | |
download | rspamd-891b250b452f8e1963a99931f241ac75e34d0281.tar.gz rspamd-891b250b452f8e1963a99931f241ac75e34d0281.zip |
[Project] Remove torch
Diffstat (limited to 'contrib/lua-torch/torch7/lib')
97 files changed, 0 insertions, 22432 deletions
diff --git a/contrib/lua-torch/torch7/lib/CMakeLists.txt b/contrib/lua-torch/torch7/lib/CMakeLists.txt deleted file mode 100644 index d6a0e2c9c..000000000 --- a/contrib/lua-torch/torch7/lib/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -SET(TH_INSTALL_BIN_SUBDIR "${BINDIR}") -SET(TH_INSTALL_LIB_SUBDIR "${RSPAMD_LIBDIR}") -SET(TH_INSTALL_INCLUDE_SUBDIR "${Torch_INSTALL_INCLUDE_SUBDIR}") -SET(TH_INSTALL_CMAKE_SUBDIR "${Torch_INSTALL_CMAKE_SUBDIR}") - -ADD_SUBDIRECTORY(TH) -ADD_SUBDIRECTORY(luaT) diff --git a/contrib/lua-torch/torch7/lib/TH/CMakeLists.txt b/contrib/lua-torch/torch7/lib/TH/CMakeLists.txt deleted file mode 100644 index f7e0bf9bb..000000000 --- a/contrib/lua-torch/torch7/lib/TH/CMakeLists.txt +++ /dev/null @@ -1,296 +0,0 @@ -cmake_minimum_required(VERSION 2.6) - -# avoid some cmake warnings - -LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -SET(CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/ ${CMAKE_LIBRARY_PATH}) - -####################################################################### -##### flags section -###################################################################### - -IF(MSVC) - # MSVC now supports C99 since VS2013/VS2015, however the standard version switch is not provided yet - # SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /std:c99") -ELSE(MSVC) - # enable gnu99 and not c99 because we use - # gnu extensions like posix_memalign - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99") -ENDIF(MSVC) - -IF(MSVC) - ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) # respect the standard -ENDIF(MSVC) -SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") -IF(UNIX) - # prevent Unknown CMake command "check_function_exists". - INCLUDE(CheckFunctionExists) -ENDIF(UNIX) - -# OpenMP support? - -IF (WITH_OPENMP) - FIND_PACKAGE(OpenMP) - IF(OPENMP_FOUND) - MESSAGE(STATUS "Compiling with OpenMP support") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") - ENDIF(OPENMP_FOUND) -ENDIF (WITH_OPENMP) - -# ARM specific flags -FIND_PACKAGE(ARM) -IF (ASIMD_FOUND) - MESSAGE(STATUS "asimd/Neon found with compiler flag : -D__NEON__") - SET(CMAKE_C_FLAGS "-D__NEON__ ${CMAKE_C_FLAGS}") -ELSEIF (NEON_FOUND) - MESSAGE(STATUS "Neon found with compiler flag : -mfpu=neon -D__NEON__") - SET(CMAKE_C_FLAGS "-mfpu=neon -D__NEON__ ${CMAKE_C_FLAGS}") -ENDIF (ASIMD_FOUND) -IF (CORTEXA8_FOUND) - MESSAGE(STATUS "Cortex-A8 Found with compiler flag : -mcpu=cortex-a8") - SET(CMAKE_C_FLAGS "-mcpu=cortex-a8 -fprefetch-loop-arrays ${CMAKE_C_FLAGS}") -ENDIF (CORTEXA8_FOUND) -IF (CORTEXA9_FOUND) - MESSAGE(STATUS "Cortex-A9 Found with compiler flag : -mcpu=cortex-a9") - SET(CMAKE_C_FLAGS "-mcpu=cortex-a9 ${CMAKE_C_FLAGS}") -ENDIF (CORTEXA9_FOUND) - -INCLUDE (CheckIncludeFile) -INCLUDE (CheckCSourceCompiles) -CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) -# Check for a cpuid intrinsic -IF(HAVE_CPUID_H) - CHECK_C_SOURCE_COMPILES("#include <cpuid.h> - int main() - { - unsigned int eax, ebx, ecx, edx; - return __get_cpuid(0, &eax, &ebx, &ecx, &edx); - }" HAVE_GCC_GET_CPUID) -ENDIF() -IF(HAVE_GCC_GET_CPUID) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_GCC_GET_CPUID") -ENDIF(HAVE_GCC_GET_CPUID) - -CHECK_C_SOURCE_COMPILES("#include <stdint.h> - static inline void cpuid(uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) - { - uint32_t a = *eax, b, c = *ecx, d; - asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) ); - *eax = a; *ebx = b; *ecx = c; *edx = d; - } - int main() { - uint32_t a,b,c,d; - cpuid(&a, &b, &c, &d); - return 0; - }" NO_GCC_EBX_FPIC_BUG) - -IF(NOT NO_GCC_EBX_FPIC_BUG) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_GCC_GET_CPUID") -ENDIF(NOT NO_GCC_EBX_FPIC_BUG) - - -FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2 -IF(C_SSE2_FOUND) - MESSAGE(STATUS "SSE2 Found") - SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}") -ENDIF(C_SSE2_FOUND) -IF(C_SSE3_FOUND) - MESSAGE(STATUS "SSE3 Found") - SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}") -ENDIF(C_SSE3_FOUND) -# we dont set -mavx and -mavx2 flags globally, but only for specific files -# however, we want to enable the AVX codepaths, so we still need to -# add USE_AVX and USE_AVX2 macro defines -IF(FALSE) -IF(C_AVX_FOUND) - MESSAGE(STATUS "AVX Found") - SET(CMAKE_C_FLAGS "-DUSE_AVX ${CMAKE_C_FLAGS}") -ENDIF(C_AVX_FOUND) -IF(C_AVX2_FOUND) - MESSAGE(STATUS "AVX2 Found") - SET(CMAKE_C_FLAGS "-DUSE_AVX2 ${CMAKE_C_FLAGS}") -ENDIF(C_AVX2_FOUND) -ENDIF() - -CHECK_C_SOURCE_RUNS(" -#include <stdatomic.h> -int main() -{ - int a; - int oa; - atomic_store(&a, 1); - atomic_fetch_add(&a, 1); - oa = atomic_load(&a); - if(!atomic_compare_exchange_strong(&a, &oa, 3)) - return -1; - return 0; -} -" HAS_C11_ATOMICS) - -IF(NOT HAS_C11_ATOMICS) - CHECK_C_SOURCE_RUNS(" -#include <intrin.h> -int main() -{ - long a; - _InterlockedExchange(&a, 1); - _InterlockedExchangeAdd(&a, 1); - if(_InterlockedCompareExchange(&a, 3, 2) != 2) - return -1; - return 0; -} -" HAS_MSC_ATOMICS) - - CHECK_C_SOURCE_RUNS(" -int main() -{ - int a; - __sync_lock_test_and_set(&a, 1); - __sync_fetch_and_add(&a, 1); - if(!__sync_bool_compare_and_swap(&a, 2, 3)) - return -1; - return 0; -} -" HAS_GCC_ATOMICS) -ENDIF() - -####################################################################### -##### sources section -###################################################################### - -# IF ANY SIMD FOUND -IF ("${ARCH}" STREQUAL "x86_64") - SET(simd generic/simd/convolve.c generic/simd/convolve5x5_sse.c) - SET(CMAKE_C_FLAGS "-DUSE_SSE2 ${CMAKE_C_FLAGS}") - SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math") -ENDIF() - - -# IF AVX FOUND -IF(FALSE) -IF(C_AVX_FOUND) - IF(MSVC) - SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast ${C_AVX_FLAGS}") - SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS "/Ox /arch:AVX ${C_AVX_FLAGS}") - ELSE(MSVC) - SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "-O3 -ffast-math ${C_AVX_FLAGS}") - SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS "-O3 ${C_AVX_FLAGS}") - ENDIF(MSVC) - SET(simd ${simd} vector/AVX.c generic/simd/convolve5x5_avx.c) -ENDIF(C_AVX_FOUND) - -IF(C_AVX2_FOUND) - IF(MSVC) - SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "/Ox /arch:AVX2 ${C_AVX2_FLAGS}") - ELSE(MSVC) - SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS "-O3 ${C_AVX2_FLAGS}") - ENDIF(MSVC) - SET(simd ${simd} vector/AVX2.c) -ENDIF(C_AVX2_FOUND) -ENDIF() - -SET(hdr - THGeneral.h THHalf.h THAllocator.h THSize.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h - THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h ) - -SET(src - THGeneral.c THHalf.c THAllocator.c THSize.c THStorage.c THTensor.c THBlas.c THLapack.c - THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c) - -SET(src ${src} ${hdr} ${simd}) - -####################################################################### -##### build section -###################################################################### - -ADD_TORCH_LIBRARY(TH SHARED "${src}") - -IF(HAS_C11_ATOMICS) - ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1) - MESSAGE(STATUS "Atomics: using C11 intrinsics") -ELSEIF(HAS_MSC_ATOMICS) - ADD_DEFINITIONS(-DUSE_MSC_ATOMICS=1) - MESSAGE(STATUS "Atomics: using MSVC intrinsics") -ELSEIF(HAS_GCC_ATOMICS) - ADD_DEFINITIONS(-DUSE_GCC_ATOMICS=1) - MESSAGE(STATUS "Atomics: using GCC intrinsics") -ELSE() - SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) - FIND_PACKAGE(Threads) - IF(THREADS_FOUND) - ADD_DEFINITIONS(-DUSE_PTHREAD_ATOMICS=1) - TARGET_LINK_LIBRARIES(TH ${CMAKE_THREAD_LIBS_INIT}) - MESSAGE(STATUS "Atomics: using pthread") - ENDIF() -ENDIF() - -FIND_PACKAGE(BLAS) -IF(BLAS_FOUND) - SET(USE_BLAS 1) - TARGET_LINK_LIBRARIES(TH ${BLAS_LIBRARIES}) - IF(BLAS_INFO STREQUAL "mkl") - ADD_DEFINITIONS(-DTH_BLAS_MKL) - ELSEIF(BLAS_INFO STREQUAL "open") - ADD_DEFINITIONS(-DTH_BLAS_OPEN) - ENDIF() -ENDIF(BLAS_FOUND) - -FIND_PACKAGE(LAPACK) -IF(LAPACK_FOUND) - SET(USE_LAPACK 1) - TARGET_LINK_LIBRARIES(TH ${LAPACK_LIBRARIES}) -ENDIF(LAPACK_FOUND) - -IF (UNIX AND NOT APPLE) - INCLUDE(CheckLibraryExists) - # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830 - CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT) - IF(NEED_LIBRT) - TARGET_LINK_LIBRARIES(TH rt) - SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt) - ENDIF(NEED_LIBRT) -ENDIF(UNIX AND NOT APPLE) - -IF(UNIX) - SET(CMAKE_EXTRA_INCLUDE_FILES "sys/mman.h") - CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) - IF(HAVE_MMAP) - ADD_DEFINITIONS(-DHAVE_MMAP=1) - ENDIF(HAVE_MMAP) - # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html - ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64) - CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN) - IF(HAVE_SHM_OPEN) - ADD_DEFINITIONS(-DHAVE_SHM_OPEN=1) - ENDIF(HAVE_SHM_OPEN) - CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK) - IF(HAVE_SHM_UNLINK) - ADD_DEFINITIONS(-DHAVE_SHM_UNLINK=1) - ENDIF(HAVE_SHM_UNLINK) - CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE) - IF(HAVE_MALLOC_USABLE_SIZE) - ADD_DEFINITIONS(-DHAVE_MALLOC_USABLE_SIZE=1) - ENDIF(HAVE_MALLOC_USABLE_SIZE) -ENDIF(UNIX) - -IF(NOT MSVC) - TARGET_LINK_LIBRARIES(TH m) -ENDIF(NOT MSVC) - -# Is __thread supported? -IF(NOT MSVC) - CHECK_C_SOURCE_COMPILES("static __thread int x = 1; int main() { return x; }" C_HAS_THREAD) -ELSE(NOT MSVC) - CHECK_C_SOURCE_COMPILES("static __declspec( thread ) int x = 1; int main() { return x; }" C_HAS_THREAD) -ENDIF(NOT MSVC) -IF(NOT C_HAS_THREAD) - MESSAGE(STATUS "Warning: __thread is not supported, generating thread-unsafe code") -ELSE(NOT C_HAS_THREAD) - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTH_HAVE_THREAD") -ENDIF(NOT C_HAS_THREAD) - -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") -CONFIGURE_FILE(THGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h") diff --git a/contrib/lua-torch/torch7/lib/TH/README.md b/contrib/lua-torch/torch7/lib/TH/README.md deleted file mode 100644 index 4ac26c103..000000000 --- a/contrib/lua-torch/torch7/lib/TH/README.md +++ /dev/null @@ -1,11 +0,0 @@ -Environment variables control the disabling of certain explicit SIMD optimizations. - -``` -x64 options: -TH_NO_AVX2=1 # disable AVX2 codepaths -TH_NO_AVX=1 # disable AVX codepaths -TH_NO_SSE=1 # disable SSE codepaths - -ppc64le options: -TH_NO_VSX=1 # disable VSX codepaths -``` diff --git a/contrib/lua-torch/torch7/lib/TH/TH.h b/contrib/lua-torch/torch7/lib/TH/TH.h deleted file mode 100644 index 11f208c4b..000000000 --- a/contrib/lua-torch/torch7/lib/TH/TH.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef TH_INC -#define TH_INC - -#include "THGeneral.h" - -#include "THBlas.h" -#ifdef USE_LAPACK -#include "THLapack.h" -#endif - -#include "THAtomic.h" -#include "THVector.h" -#include "THLogAdd.h" -#include "THRandom.h" -#include "THSize.h" -#include "THStorage.h" -#include "THTensor.h" -#include "THTensorApply.h" -#include "THTensorDimApply.h" - -#include "THFile.h" -#include "THDiskFile.h" -#include "THMemoryFile.h" - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THAllocator.c b/contrib/lua-torch/torch7/lib/TH/THAllocator.c deleted file mode 100644 index 51ac69b94..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THAllocator.c +++ /dev/null @@ -1,500 +0,0 @@ -#include "THAllocator.h" -#include "THAtomic.h" - -/* stuff for mapped files */ -#ifdef _WIN32 -#include <windows.h> -#endif - -#if HAVE_MMAP -#include <sys/types.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#endif -/* end of stuff for mapped files */ - -static void *THDefaultAllocator_alloc(void* ctx, ptrdiff_t size) { - return THAlloc(size); -} - -static void *THDefaultAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { - return THRealloc(ptr, size); -} - -static void THDefaultAllocator_free(void* ctx, void* ptr) { - THFree(ptr); -} - -THAllocator THDefaultAllocator = { - &THDefaultAllocator_alloc, - &THDefaultAllocator_realloc, - &THDefaultAllocator_free -}; - -#if defined(_WIN32) || defined(HAVE_MMAP) - -struct THMapAllocatorContext_ { - char *filename; /* file name */ - int flags; - ptrdiff_t size; /* mapped size */ - int fd; -}; - -#define TH_ALLOC_ALIGNMENT 64 - -typedef struct { - int refcount; -} THMapInfo; - -char * unknown_filename = "filename not specified"; - -THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags) -{ - THMapAllocatorContext *ctx = THAlloc(sizeof(THMapAllocatorContext)); - - if (!(flags & TH_ALLOCATOR_MAPPED_SHARED) && !(flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)) - flags &= ~TH_ALLOCATOR_MAPPED_NOCREATE; - if ((flags ^ TH_ALLOCATOR_MAPPED_EXCLUSIVE) == 0) - THError("TH_ALLOCATOR_MAPPED_EXCLUSIVE flag requires opening the file " - "in shared mode"); - - if (filename) { - ctx->filename = THAlloc(strlen(filename)+1); - strcpy(ctx->filename, filename); - } else { - ctx->filename = unknown_filename; - } - ctx->flags = flags; - ctx->size = 0; - ctx->fd = -1; - - return ctx; -} - -THMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename, int fd, int flags) -{ - THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags); - ctx->fd = fd; - - return ctx; -} - -char * THMapAllocatorContext_filename(THMapAllocatorContext *ctx) -{ - return ctx->filename; -} - -int THMapAllocatorContext_fd(THMapAllocatorContext *ctx) -{ - return ctx->fd; -} - -ptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx) -{ - return ctx->size; -} - -void THMapAllocatorContext_free(THMapAllocatorContext *ctx) -{ - if (ctx->filename != unknown_filename) - THFree(ctx->filename); - THFree(ctx); -} - -static void *_map_alloc(void* ctx_, ptrdiff_t size) -{ - THMapAllocatorContext *ctx = ctx_; - void *data = NULL; - -#ifdef _WIN32 - { - HANDLE hfile; - HANDLE hmfile; - LARGE_INTEGER hfilesz; - - if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE) - THError("exclusive file mapping is not supported on Windows"); - if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE) - THError("file mapping without creation is not supported on Windows"); - if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) - THError("TH_ALLOCATOR_MAPPED_KEEPFD not supported on Windows"); - if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD) - THError("TH_ALLOCATOR_MAPPED_FROMFD not supported on Windows"); - - /* open file */ - /* FILE_FLAG_RANDOM_ACCESS ? */ - if(ctx->flags) - { - hfile = CreateFileA(ctx->filename, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0); - if (hfile == INVALID_HANDLE_VALUE) - THError("could not open file <%s> in read-write mode; error code: <%d>", ctx->filename, GetLastError()); - } - else - { - hfile = CreateFileA(ctx->filename, GENERIC_READ, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - if (hfile == INVALID_HANDLE_VALUE) - THError("could not open file <%s> in read-only mode; error code: <%d>", ctx->filename, GetLastError()); - } - - if (GetFileSizeEx(hfile, &hfilesz) == 0) - { - THError("could not get file size: <%s>; error code: <%d>", ctx->filename, GetLastError()); - } - - if(size > 0) - { - if(size > hfilesz.QuadPart) - { - if(ctx->flags) - { - hfilesz.QuadPart = size; - if(SetFilePointerEx(hfile, hfilesz, NULL, FILE_BEGIN) == 0) - { - CloseHandle(hfile); - THError("unable to stretch file <%s> to the right size; error code: <%d>", ctx->filename, GetLastError()); - } - if(SetEndOfFile(hfile) == 0) - { - CloseHandle(hfile); - THError("unable to write to file <%s>; error code: <%d>", ctx->filename, GetLastError()); - } - } - else - { - CloseHandle(hfile); - THError("file <%s> size is smaller than the required mapping size <%ld>; error code: <%d>", ctx->filename, size, GetLastError()); - } - } - } - else - size = hfilesz.QuadPart; - - ctx->size = size; /* if we are here, it must be the right size */ - - hfilesz.QuadPart = ctx->size; - - /* get map handle */ - if(ctx->flags) - { - if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_READWRITE, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL ) - THError("could not create a map on file <%s>; error code: <%d>", ctx->filename, GetLastError()); - } - else - { - if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_WRITECOPY, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL ) - THError("could not create a map on file <%s>; error code: <%d>", ctx->filename, GetLastError()); - } - - /* map the stuff */ - if(ctx->flags) - data = MapViewOfFile(hmfile, FILE_MAP_ALL_ACCESS, 0, 0, 0); - else - data = MapViewOfFile(hmfile, FILE_MAP_COPY, 0, 0, 0); - - CloseHandle(hfile); - CloseHandle(hmfile); - } -#else /* _WIN32 */ - { - /* open file */ - int fd; - int flags; - struct stat file_stat; - - if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM)) - flags = O_RDWR | O_CREAT; - else - flags = O_RDONLY; - - if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE) - flags |= O_EXCL; - if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE) - flags &= ~O_CREAT; - - if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)) { - if(ctx->flags & TH_ALLOCATOR_MAPPED_SHARED) - { - if((fd = open(ctx->filename, flags, (mode_t)0600)) == -1) - THError("unable to open file <%s> in read-write mode", ctx->filename); - } - else if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM) - { -#ifdef HAVE_SHM_OPEN - if((fd = shm_open(ctx->filename, flags, (mode_t)0600)) == -1) - THError("unable to open shared memory object <%s> in read-write mode", ctx->filename); -#else - THError("unable to open file <%s> in sharedmem mode, shm_open unavailable on this platform", ctx->filename); -#endif - } - else - { - if((fd = open(ctx->filename, O_RDONLY)) == -1) - THError("unable to open file <%s> in read-only mode", ctx->filename); - } - } else { - fd = ctx->fd; - } - - if(fstat(fd, &file_stat) == -1) - { - if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)) - close(fd); - THError("unable to stat the file <%s>", ctx->filename); - } - - if(size > 0) - { - if(size > file_stat.st_size) - { - if(ctx->flags) - { - if(ftruncate(fd, size) == -1) - THError("unable to resize file <%s> to the right size", ctx->filename); - if(fstat(fd, &file_stat) == -1 || file_stat.st_size < size) - { - close(fd); - THError("unable to stretch file <%s> to the right size", ctx->filename); - } -/* on OS X write returns with errno 45 (Opperation not supported) when used - * with a file descriptor obtained via shm_open - */ -#ifndef __APPLE__ - if((write(fd, "", 1)) != 1) /* note that the string "" contains the '\0' byte ... */ - { - close(fd); - THError("unable to write to file <%s>", ctx->filename); - } -#endif - } - else - { - close(fd); - THError("file <%s> size is smaller than the required mapping size <%ld>", ctx->filename, size); - } - } - } - else - size = file_stat.st_size; - - ctx->size = size; /* if we are here, it must be the right size */ - - /* map it */ - if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM)) - data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - else - data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); - - if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) { - ctx->fd = fd; - } else { - if(close(fd) == -1) - THError("Error closing file <%s>", ctx->filename); - ctx->fd = -1; - } - - if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK) { - if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM) - { -#ifdef HAVE_SHM_UNLINK - if (shm_unlink(ctx->filename) == -1) - THError("could not unlink the shared memory file %s", ctx->filename); -#else - THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename); -#endif - } - else - { - if (unlink(ctx->filename) == -1) - THError("could not unlink file %s", ctx->filename); - } - } - - if(data == MAP_FAILED) - { - data = NULL; /* let's be sure it is NULL */ - THError("$ Torch: unable to mmap memory: you tried to mmap %dGB.", ctx->size/1073741824); - } - } -#endif - - return data; -} - -static void * THMapAllocator_alloc(void *ctx, ptrdiff_t size) { - return _map_alloc(ctx, size); -} - -static void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { - THError("cannot realloc mapped data"); - return NULL; -} - -static void THMapAllocator_free(void* ctx_, void* data) { - THMapAllocatorContext *ctx = ctx_; - -#ifdef _WIN32 - if(UnmapViewOfFile(data) == 0) - THError("could not unmap the shared memory file"); -#else /* _WIN32 */ - if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) { - if (close(ctx->fd) == -1) - THError("could not close file descriptor %d", ctx->fd); - } - - if (munmap(data, ctx->size)) - THError("could not unmap the shared memory file"); - - if (!(ctx->flags & (TH_ALLOCATOR_MAPPED_FROMFD | TH_ALLOCATOR_MAPPED_UNLINK))) - { - if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM) - { -#ifdef HAVE_SHM_UNLINK - if (shm_unlink(ctx->filename) == -1) - THError("could not unlink the shared memory file %s", ctx->filename); -#else - THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename); -#endif - } - } -#endif /* _WIN32 */ - - THMapAllocatorContext_free(ctx); -} - -#else - -THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags) { - THError("file mapping not supported on your system"); - return NULL; -} - -void THMapAllocatorContext_free(THMapAllocatorContext *ctx) { - THError("file mapping not supported on your system"); -} - -static void *THMapAllocator_alloc(void* ctx_, ptrdiff_t size) { - THError("file mapping not supported on your system"); - return NULL; -} - -static void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { - THError("file mapping not supported on your system"); - return NULL; -} - -static void THMapAllocator_free(void* ctx, void* data) { - THError("file mapping not supported on your system"); -} - -#endif - -#if (defined(_WIN32) || defined(HAVE_MMAP)) && defined(TH_ATOMIC_IPC_REFCOUNT) - -static void * THRefcountedMapAllocator_alloc(void *_ctx, ptrdiff_t size) { - THMapAllocatorContext *ctx = _ctx; - - if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD) - THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_FROMFD flag"); - if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) - THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_KEEPFD flag"); - if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK) - THError("THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_UNLINK flag"); - if (!(ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)) - THError("THRefcountedMapAllocator requires TH_ALLOCATOR_MAPPED_SHAREDMEM flag"); - - size = size + TH_ALLOC_ALIGNMENT; - void *ptr = _map_alloc(ctx, size); - char *data = ((char*)ptr) + TH_ALLOC_ALIGNMENT; - THMapInfo *map_info = (THMapInfo*)ptr; - - if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE) - map_info->refcount = 1; - else - THAtomicIncrementRef(&map_info->refcount); - - return (void*)data; -} - -static void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { - THError("cannot realloc mapped data"); - return NULL; -} - -static void THRefcountedMapAllocator_free(void* ctx_, void* data) { - THMapAllocatorContext *ctx = ctx_; - -#ifdef _WIN32 - if(UnmapViewOfFile(data) == 0) - THError("could not unmap the shared memory file"); -#else /* _WIN32 */ - - THMapInfo *info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT); - if (THAtomicDecrementRef(&info->refcount)) { -#ifdef HAVE_SHM_UNLINK - if (shm_unlink(ctx->filename) == -1) - THError("could not unlink the shared memory file %s", ctx->filename); -#else - THError("could not unlink the shared memory file %s, shm_unlink not available on platform", ctx->filename); -#endif /* HAVE_SHM_UNLINK */ - } - if (munmap(info, ctx->size)) - THError("could not unmap the shared memory file %s", ctx->filename); -#endif /* _WIN32 */ - - THMapAllocatorContext_free(ctx); -} - -void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data) -{ - THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT); - THAtomicIncrementRef(&map_info->refcount); -} - -int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data) -{ - THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT); - return THAtomicDecrementRef(&map_info->refcount); -} - -#else - -static void * THRefcountedMapAllocator_alloc(void *ctx, ptrdiff_t size) { - THError("refcounted file mapping not supported on your system"); - return NULL; -} - -static void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) { - THError("refcounted file mapping not supported on your system"); - return NULL; -} - -static void THRefcountedMapAllocator_free(void* ctx_, void* data) { - THError("refcounted file mapping not supported on your system"); -} - -void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data) -{ - THError("refcounted file mapping not supported on your system"); -} - -int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data) -{ - THError("refcounted file mapping not supported on your system"); - return 0; -} - -#endif - -THAllocator THMapAllocator = { - &THMapAllocator_alloc, - &THMapAllocator_realloc, - &THMapAllocator_free -}; - -THAllocator THRefcountedMapAllocator = { - &THRefcountedMapAllocator_alloc, - &THRefcountedMapAllocator_realloc, - &THRefcountedMapAllocator_free -}; diff --git a/contrib/lua-torch/torch7/lib/TH/THAllocator.h b/contrib/lua-torch/torch7/lib/TH/THAllocator.h deleted file mode 100644 index 18fc9ec0a..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THAllocator.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef TH_ALLOCATOR_INC -#define TH_ALLOCATOR_INC - -#include "THGeneral.h" - -#define TH_ALLOCATOR_MAPPED_SHARED 1 -#define TH_ALLOCATOR_MAPPED_SHAREDMEM 2 -#define TH_ALLOCATOR_MAPPED_EXCLUSIVE 4 -#define TH_ALLOCATOR_MAPPED_NOCREATE 8 -#define TH_ALLOCATOR_MAPPED_KEEPFD 16 -#define TH_ALLOCATOR_MAPPED_FROMFD 32 -#define TH_ALLOCATOR_MAPPED_UNLINK 64 - -/* Custom allocator - */ -typedef struct THAllocator { - void* (*malloc)(void*, ptrdiff_t); - void* (*realloc)(void*, void*, ptrdiff_t); - void (*free)(void*, void*); -} THAllocator; - -/* default malloc/free allocator. malloc and realloc raise an error (using - * THError) on allocation failure. - */ -extern THAllocator THDefaultAllocator; - -/* file map allocator - */ -typedef struct THMapAllocatorContext_ THMapAllocatorContext; -TH_API THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags); -TH_API THMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename, - int fd, int flags); -TH_API char * THMapAllocatorContext_filename(THMapAllocatorContext *ctx); -TH_API int THMapAllocatorContext_fd(THMapAllocatorContext *ctx); -TH_API ptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx); -TH_API void THMapAllocatorContext_free(THMapAllocatorContext *ctx); -TH_API void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data); -TH_API int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data); - -extern THAllocator THMapAllocator; -extern THAllocator THRefcountedMapAllocator; - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THAtomic.c b/contrib/lua-torch/torch7/lib/TH/THAtomic.c deleted file mode 100644 index 714fc52db..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THAtomic.c +++ /dev/null @@ -1,267 +0,0 @@ -#include "THAtomic.h" - -/* - Note: I thank Leon Bottou for his useful comments. - Ronan. -*/ - -#if defined(USE_C11_ATOMICS) -#include <stdatomic.h> -#endif - -#if defined(USE_MSC_ATOMICS) -#include <intrin.h> -#include <assert.h> -#endif - -#if !defined(USE_MSC_ATOMICS) && !defined(USE_GCC_ATOMICS) && defined(USE_PTHREAD_ATOMICS) -#include <pthread.h> -static pthread_mutex_t ptm = PTHREAD_MUTEX_INITIALIZER; -#endif - -void THAtomicSet(int volatile *a, int newvalue) -{ -#if defined(USE_C11_ATOMICS) - atomic_store(a, newvalue); -#elif defined(USE_MSC_ATOMICS) - assert(sizeof(int) == sizeof(long)); - _InterlockedExchange((long*)a, newvalue); -#elif defined(USE_GCC_ATOMICS) - __sync_lock_test_and_set(a, newvalue); -#else - int oldvalue; - do { - oldvalue = *a; - } while (!THAtomicCompareAndSwap(a, oldvalue, newvalue)); -#endif -} - -int THAtomicGet(int volatile *a) -{ -#if defined(USE_C11_ATOMICS) - return atomic_load(a); -#else - int value; - do { - value = *a; - } while (!THAtomicCompareAndSwap(a, value, value)); - return value; -#endif -} - -int THAtomicAdd(int volatile *a, int value) -{ -#if defined(USE_C11_ATOMICS) - return atomic_fetch_add(a, value); -#elif defined(USE_MSC_ATOMICS) - assert(sizeof(int) == sizeof(long)); - return _InterlockedExchangeAdd((long*)a, value); -#elif defined(USE_GCC_ATOMICS) - return __sync_fetch_and_add(a, value); -#else - int oldvalue; - do { - oldvalue = *a; - } while (!THAtomicCompareAndSwap(a, oldvalue, (oldvalue + value))); - return oldvalue; -#endif -} - -void THAtomicIncrementRef(int volatile *a) -{ - THAtomicAdd(a, 1); -} - -int THAtomicDecrementRef(int volatile *a) -{ - return (THAtomicAdd(a, -1) == 1); -} - -int THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue) -{ -#if defined(USE_C11_ATOMICS) - return atomic_compare_exchange_strong(a, &oldvalue, newvalue); -#elif defined(USE_MSC_ATOMICS) - assert(sizeof(int) == sizeof(long)); - return (_InterlockedCompareExchange((long*)a, (long)newvalue, (long)oldvalue) == (long)oldvalue); -#elif defined(USE_GCC_ATOMICS) - return __sync_bool_compare_and_swap(a, oldvalue, newvalue); -#elif defined(USE_PTHREAD_ATOMICS) - int ret = 0; - pthread_mutex_lock(&ptm); - if(*a == oldvalue) { - *a = newvalue; - ret = 1; - } - pthread_mutex_unlock(&ptm); - return ret; -#else -#warning THAtomic is not thread safe - if(*a == oldvalue) { - *a = newvalue; - return 1; - } - else - return 0; -#endif -} - -void THAtomicSetLong(long volatile *a, long newvalue) -{ -#if defined(USE_C11_ATOMICS) - atomic_store(a, newvalue); -#elif defined(USE_MSC_ATOMICS) - _InterlockedExchange(a, newvalue); -#elif defined(USE_GCC_ATOMICS) - __sync_lock_test_and_set(a, newvalue); -#else - long oldvalue; - do { - oldvalue = *a; - } while (!THAtomicCompareAndSwapLong(a, oldvalue, newvalue)); -#endif -} - -long THAtomicGetLong(long volatile *a) -{ -#if defined(USE_C11_ATOMICS) - return atomic_load(a); -#else - long value; - do { - value = *a; - } while (!THAtomicCompareAndSwapLong(a, value, value)); - return value; -#endif -} - -long THAtomicAddLong(long volatile *a, long value) -{ -#if defined(USE_C11_ATOMICS) - return atomic_fetch_add(a, value); -#elif defined(USE_MSC_ATOMICS) - return _InterlockedExchangeAdd(a, value); -#elif defined(USE_GCC_ATOMICS) - return __sync_fetch_and_add(a, value); -#else - long oldvalue; - do { - oldvalue = *a; - } while (!THAtomicCompareAndSwapLong(a, oldvalue, (oldvalue + value))); - return oldvalue; -#endif -} - -long THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue) -{ -#if defined(USE_C11_ATOMICS) - return atomic_compare_exchange_strong(a, &oldvalue, newvalue); -#elif defined(USE_MSC_ATOMICS) - return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue); -#elif defined(USE_GCC_ATOMICS) - return __sync_bool_compare_and_swap(a, oldvalue, newvalue); -#elif defined(USE_PTHREAD_ATOMICS) - long ret = 0; - pthread_mutex_lock(&ptm); - if(*a == oldvalue) { - *a = newvalue; - ret = 1; - } - pthread_mutex_unlock(&ptm); - return ret; -#else -#warning THAtomic is not thread safe - if(*a == oldvalue) { - *a = newvalue; - return 1; - } - else - return 0; -#endif -} - -void THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue) -{ -#if defined(USE_C11_ATOMICS) - atomic_store(a, newvalue); -#elif defined(USE_MSC_ATOMICS) -#ifdef _WIN64 - _InterlockedExchange64(a, newvalue); -#else - _InterlockedExchange(a, newvalue); -#endif -#elif defined(USE_GCC_ATOMICS) - __sync_lock_test_and_set(a, newvalue); -#else - ptrdiff_t oldvalue; - do { - oldvalue = *a; - } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, newvalue)); -#endif -} - -ptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a) -{ -#if defined(USE_C11_ATOMICS) - return atomic_load(a); -#else - ptrdiff_t value; - do { - value = *a; - } while (!THAtomicCompareAndSwapPtrdiff(a, value, value)); - return value; -#endif -} - -ptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value) -{ -#if defined(USE_C11_ATOMICS) - return atomic_fetch_add(a, value); -#elif defined(USE_MSC_ATOMICS) -#ifdef _WIN64 - return _InterlockedExchangeAdd64(a, value); -#else - return _InterlockedExchangeAdd(a, value); -#endif -#elif defined(USE_GCC_ATOMICS) - return __sync_fetch_and_add(a, value); -#else - ptrdiff_t oldvalue; - do { - oldvalue = *a; - } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, (oldvalue + value))); - return oldvalue; -#endif -} - -ptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue) -{ -#if defined(USE_C11_ATOMICS) - return atomic_compare_exchange_strong(a, &oldvalue, newvalue); -#elif defined(USE_MSC_ATOMICS) -#ifdef _WIN64 - return (_InterlockedCompareExchange64(a, newvalue, oldvalue) == oldvalue); -#else - return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue); -#endif -#elif defined(USE_GCC_ATOMICS) - return __sync_bool_compare_and_swap(a, oldvalue, newvalue); -#elif defined(USE_PTHREAD_ATOMICS) - ptrdiff_t ret = 0; - pthread_mutex_lock(&ptm); - if(*a == oldvalue) { - *a = newvalue; - ret = 1; - } - pthread_mutex_unlock(&ptm); - return ret; -#else -#warning THAtomic is not thread safe - if(*a == oldvalue) { - *a = newvalue; - return 1; - } - else - return 0; -#endif -} diff --git a/contrib/lua-torch/torch7/lib/TH/THAtomic.h b/contrib/lua-torch/torch7/lib/TH/THAtomic.h deleted file mode 100644 index d77b20b24..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THAtomic.h +++ /dev/null @@ -1,125 +0,0 @@ -#ifndef TH_ATOMIC_INC -#define TH_ATOMIC_INC - -#include "THGeneral.h" - -/****************************************************************************** - * Atomic operations for TH - * Five backends are integrated: - * - C11 atomic operations - * - MSVC intrinsics - * - GCC intrinsics - * - Pthread if none of the above is available - * - Unsafe mode in none of the above is available - ******************************************************************************/ - - -/****************************************************************************** - * all-purpose functions - ******************************************************************************/ - -/* - * *a = newvalue -*/ -TH_API void THAtomicSet(int volatile *a, int newvalue); - -/* - * return *a -*/ -TH_API int THAtomicGet(int volatile *a); - -/* - * *a += value, - * return previous *a -*/ -TH_API int THAtomicAdd(int volatile *a, int value); - -/* - * check if (*a == oldvalue) - * if true: set *a to newvalue, return 1 - * if false: return 0 -*/ -TH_API int THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue); - - -/****************************************************************************** - * refcounting functions - ******************************************************************************/ - -/* - * *a++ -*/ -TH_API void THAtomicIncrementRef(int volatile *a); - -/* - * *a--, - * return 1 if *a == 0 after the operation, 0 otherwise -*/ -TH_API int THAtomicDecrementRef(int volatile *a); - - - -/****************************************************************************** - * functions for long type - ******************************************************************************/ - -/* - * *a = newvalue -*/ -TH_API void THAtomicSetLong(long volatile *a, long newvalue); - -/* - * return *a -*/ -TH_API long THAtomicGetLong(long volatile *a); - -/* - * *a += value, - * return previous *a -*/ -TH_API long THAtomicAddLong(long volatile *a, long value); - -/* - * check if (*a == oldvalue) - * if true: set *a to newvalue, return 1 - * if false: return 0 -*/ -TH_API long THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue); - - - -/****************************************************************************** - * functions for ptrdiff_t type - ******************************************************************************/ - -/* - * *a = newvalue -*/ -TH_API void THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue); - -/* - * return *a -*/ -TH_API ptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a); - -/* - * *a += value, - * return previous *a -*/ -TH_API ptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value); - -/* - * check if (*a == oldvalue) - * if true: set *a to newvalue, return 1 - * if false: return 0 -*/ -TH_API ptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue); - -#if defined(USE_C11_ATOMICS) && defined(ATOMIC_INT_LOCK_FREE) && \ - ATOMIC_INT_LOCK_FREE == 2 -#define TH_ATOMIC_IPC_REFCOUNT 1 -#elif defined(USE_MSC_ATOMICS) || defined(USE_GCC_ATOMICS) -#define TH_ATOMIC_IPC_REFCOUNT 1 -#endif - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THBlas.c b/contrib/lua-torch/torch7/lib/TH/THBlas.c deleted file mode 100644 index 35618b26a..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THBlas.c +++ /dev/null @@ -1,4 +0,0 @@ -#include "THBlas.h" - -#include "generic/THBlas.c" -#include "THGenerateAllTypes.h" diff --git a/contrib/lua-torch/torch7/lib/TH/THBlas.h b/contrib/lua-torch/torch7/lib/TH/THBlas.h deleted file mode 100644 index 5fef0febc..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THBlas.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef TH_BLAS_INC -#define TH_BLAS_INC - -#include "THGeneral.h" - -#define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME) - -#include "generic/THBlas.h" -#include "THGenerateAllTypes.h" - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THConfig.cmake.in b/contrib/lua-torch/torch7/lib/TH/THConfig.cmake.in deleted file mode 100644 index 306cd878b..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THConfig.cmake.in +++ /dev/null @@ -1,9 +0,0 @@ -# Find the TH includes and library -# -# TH_INCLUDE_DIR -- where to find the includes -# TH_LIBRARIES -- list of libraries to link against -# TH_FOUND -- set to 1 if found - -SET(TH_FOUND 1) -SET(TH_INCLUDE_DIR "@TH_INCLUDE_DIR@") -SET(TH_LIBRARIES "@TH_LIBRARIES@") diff --git a/contrib/lua-torch/torch7/lib/TH/THDiskFile.c b/contrib/lua-torch/torch7/lib/TH/THDiskFile.c deleted file mode 100644 index 3f57b3b35..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THDiskFile.c +++ /dev/null @@ -1,797 +0,0 @@ -#include "THGeneral.h" -#include "THDiskFile.h" -#include "THFilePrivate.h" - -#include <stdint.h> -#ifndef LLONG_MAX -#define LLONG_MAX 9223372036854775807LL -#endif - -typedef struct THDiskFile__ -{ - THFile file; - - FILE *handle; - char *name; - int isNativeEncoding; - int longSize; - -} THDiskFile; - -static int THDiskFile_isOpened(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)self; - return (dfself->handle != NULL); -} - -const char *THDiskFile_name(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)self; - return dfself->name; -} - -/* workaround mac osx lion ***insane*** fread bug */ -#ifdef __APPLE__ -size_t fread__(void *ptr, size_t size, size_t nitems, FILE *stream) -{ - size_t nread = 0; - while(!feof(stream) && !ferror(stream) && (nread < nitems)) - nread += fread((char*)ptr+nread*size, size, THMin(2147483648/size, nitems-nread), stream); - return nread; -} -#else -#define fread__ fread -#endif - -#define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM) \ - static size_t THDiskFile_read##TYPEC(THFile *self, TYPE *data, size_t n) \ - { \ - THDiskFile *dfself = (THDiskFile*)(self); \ - size_t nread = 0L; \ - \ - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); \ - THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file"); \ - \ - if(dfself->file.isBinary) \ - { \ - nread = fread__(data, sizeof(TYPE), n, dfself->handle); \ - if(!dfself->isNativeEncoding && (sizeof(TYPE) > 1) && (nread > 0)) \ - THDiskFile_reverseMemory(data, data, sizeof(TYPE), nread); \ - } \ - else \ - { \ - size_t i; \ - for(i = 0; i < n; i++) \ - { \ - ASCII_READ_ELEM; /* increment here result and break if wrong */ \ - } \ - if(dfself->file.isAutoSpacing && (n > 0)) \ - { \ - int c = fgetc(dfself->handle); \ - if( (c != '\n') && (c != EOF) ) \ - ungetc(c, dfself->handle); \ - } \ - } \ - \ - if(nread != n) \ - { \ - dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \ - if(!dfself->file.isQuiet) \ - THError("read error: read %d blocks instead of %d", nread, n); \ - } \ - \ - return nread; \ - } \ - \ - static size_t THDiskFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \ - { \ - THDiskFile *dfself = (THDiskFile*)(self); \ - size_t nwrite = 0L; \ - \ - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); \ - THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file"); \ - \ - if(dfself->file.isBinary) \ - { \ - if(dfself->isNativeEncoding) \ - { \ - nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle); \ - } \ - else \ - { \ - if(sizeof(TYPE) > 1) \ - { \ - char *buffer = THAlloc(sizeof(TYPE)*n); \ - THDiskFile_reverseMemory(buffer, data, sizeof(TYPE), n); \ - nwrite = fwrite(buffer, sizeof(TYPE), n, dfself->handle); \ - THFree(buffer); \ - } \ - else \ - nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle); \ - } \ - } \ - else \ - { \ - size_t i; \ - for(i = 0; i < n; i++) \ - { \ - ASCII_WRITE_ELEM; \ - if( dfself->file.isAutoSpacing && (i < n-1) ) \ - fprintf(dfself->handle, " "); \ - } \ - if(dfself->file.isAutoSpacing && (n > 0)) \ - fprintf(dfself->handle, "\n"); \ - } \ - \ - if(nwrite != n) \ - { \ - dfself->file.hasError = 1; \ - if(!dfself->file.isQuiet) \ - THError("write error: wrote %d blocks instead of %d", nwrite, n); \ - } \ - \ - return nwrite; \ -} - -static int THDiskFile_mode(const char *mode, int *isReadable, int *isWritable) -{ - *isReadable = 0; - *isWritable = 0; - if(strlen(mode) == 1) - { - if(*mode == 'r') - { - *isReadable = 1; - return 1; - } - else if(*mode == 'w') - { - *isWritable = 1; - return 1; - } - } - else if(strlen(mode) == 2) - { - if(mode[0] == 'r' && mode[1] == 'w') - { - *isReadable = 1; - *isWritable = 1; - return 1; - } - } - return 0; -} - -static void THDiskFile_synchronize(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - fflush(dfself->handle); -} - -static void THDiskFile_seek(THFile *self, size_t position) -{ - THDiskFile *dfself = (THDiskFile*)(self); - - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - -#if defined(_WIN64) - THArgCheck(position <= (size_t)INT64_MAX, 2, "position must be smaller than INT64_MAX"); - if(_fseeki64(dfself->handle, (__int64)position, SEEK_SET) < 0) -#elif defined(_WIN32) - THArgCheck(position <= (size_t)LONG_MAX, 2, "position must be smaller than LONG_MAX"); - if(fseek(dfself->handle, (long)position, SEEK_SET) < 0) -#else - THArgCheck(position <= (size_t)LLONG_MAX, 2, "position must be smaller than LLONG_MAX"); - if(fseeko(dfself->handle, (off_t)position, SEEK_SET) < 0) -#endif - { - dfself->file.hasError = 1; - if(!dfself->file.isQuiet) - THError("unable to seek to position %zu", position); - } -} - -static void THDiskFile_seekEnd(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - -#if defined(_WIN64) - if(_fseeki64(dfself->handle, 0, SEEK_END) < 0) -#elif defined(_WIN32) - if(fseek(dfself->handle, 0, SEEK_END) < 0) -#else - if(fseeko(dfself->handle, 0, SEEK_END) < 0) -#endif - { - dfself->file.hasError = 1; - if(!dfself->file.isQuiet) - THError("unable to seek at end of file"); - } -} - -static size_t THDiskFile_position(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - -#if defined(_WIN64) - __int64 offset = _ftelli64(dfself->handle); -#elif defined(_WIN32) - long offset = ftell(dfself->handle); -#else - off_t offset = ftello(dfself->handle); -#endif - if (offset > -1) - return (size_t)offset; - else if(!dfself->file.isQuiet) - THError("unable to obtain disk file offset (maybe a long overflow occurred)"); - - return 0; -} - -static void THDiskFile_close(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - fclose(dfself->handle); - dfself->handle = NULL; -} - -/* Little and Big Endian */ - -static void THDiskFile_reverseMemory(void *dst, const void *src, size_t blockSize, size_t numBlocks) -{ - if(blockSize > 1) - { - size_t halfBlockSize = blockSize/2; - char *charSrc = (char*)src; - char *charDst = (char*)dst; - size_t b, i; - for(b = 0; b < numBlocks; b++) - { - for(i = 0; i < halfBlockSize; i++) - { - char z = charSrc[i]; - charDst[i] = charSrc[blockSize-1-i]; - charDst[blockSize-1-i] = z; - } - charSrc += blockSize; - charDst += blockSize; - } - } -} - -int THDiskFile_isLittleEndianCPU(void) -{ - int x = 7; - char *ptr = (char *)&x; - - if(ptr[0] == 0) - return 0; - else - return 1; -} - -int THDiskFile_isBigEndianCPU(void) -{ - return(!THDiskFile_isLittleEndianCPU()); -} - -void THDiskFile_nativeEndianEncoding(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - dfself->isNativeEncoding = 1; -} - -void THDiskFile_littleEndianEncoding(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - dfself->isNativeEncoding = THDiskFile_isLittleEndianCPU(); -} - -void THDiskFile_bigEndianEncoding(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - dfself->isNativeEncoding = !THDiskFile_isLittleEndianCPU(); -} - -/* End of Little and Big Endian Stuff */ - -void THDiskFile_longSize(THFile *self, int size) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - THArgCheck(size == 0 || size == 4 || size == 8, 1, "Invalid long size specified"); - dfself->longSize = size; -} - -void THDiskFile_noBuffer(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - if (setvbuf(dfself->handle, NULL, _IONBF, 0)) { - THError("error: cannot disable buffer"); - } -} - -static void THDiskFile_free(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - if(dfself->handle) - fclose(dfself->handle); - THFree(dfself->name); - THFree(dfself); -} - -/* READ_WRITE_METHODS(int, Bool, */ -/* int value = 0; int ret = fscanf(file->handle, "%d", &value); array[i] = (value ? 1 : 0); if(ret <= 0) break; else result++, */ -/* int value = (array[i] ? 1 : 0); nElemWritten = fprintf(file->handle, "%d", value), */ -/* true) */ - -/* Note that we do a trick */ -READ_WRITE_METHODS(unsigned char, Byte, - nread = fread(data, 1, n, dfself->handle); break, - nwrite = fwrite(data, 1, n, dfself->handle); break) - -READ_WRITE_METHODS(char, Char, - nread = fread(data, 1, n, dfself->handle); break, - nwrite = fwrite(data, 1, n, dfself->handle); break) - -READ_WRITE_METHODS(short, Short, - int ret = fscanf(dfself->handle, "%hd", &data[i]); if(ret <= 0) break; else nread++, - int ret = fprintf(dfself->handle, "%hd", data[i]); if(ret <= 0) break; else nwrite++) - -READ_WRITE_METHODS(int, Int, - int ret = fscanf(dfself->handle, "%d", &data[i]); if(ret <= 0) break; else nread++, - int ret = fprintf(dfself->handle, "%d", data[i]); if(ret <= 0) break; else nwrite++) - -READ_WRITE_METHODS(float, Float, - int ret = fscanf(dfself->handle, "%g", &data[i]); if(ret <= 0) break; else nread++, - int ret = fprintf(dfself->handle, "%.9g", data[i]); if(ret <= 0) break; else nwrite++) - -READ_WRITE_METHODS(THHalf, Half, - float buf; int ret = fscanf(dfself->handle, "%g", &buf); if(ret <= 0) break; else { data[i]= TH_float2half(buf); nread++; }, - int ret = fprintf(dfself->handle, "%.9g", TH_half2float(data[i])); if(ret <= 0) break; else nwrite++) - -READ_WRITE_METHODS(double, Double, - int ret = fscanf(dfself->handle, "%lg", &data[i]); if(ret <= 0) break; else nread++, - int ret = fprintf(dfself->handle, "%.17g", data[i]); if(ret <= 0) break; else nwrite++) - - -/* For Long we need to rewrite everything, because of the special management of longSize */ -static size_t THDiskFile_readLong(THFile *self, long *data, size_t n) -{ - THDiskFile *dfself = (THDiskFile*)(self); - size_t nread = 0L; - - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file"); - - if(dfself->file.isBinary) - { - if(dfself->longSize == 0 || dfself->longSize == sizeof(long)) - { - nread = fread__(data, sizeof(long), n, dfself->handle); - if(!dfself->isNativeEncoding && (sizeof(long) > 1) && (nread > 0)) - THDiskFile_reverseMemory(data, data, sizeof(long), nread); - } else if(dfself->longSize == 4) - { - nread = fread__(data, 4, n, dfself->handle); - if(!dfself->isNativeEncoding && (nread > 0)) - THDiskFile_reverseMemory(data, data, 4, nread); - size_t i; - for(i = nread; i > 0; i--) - data[i-1] = ((int *)data)[i-1]; - } - else /* if(dfself->longSize == 8) */ - { - int big_endian = !THDiskFile_isLittleEndianCPU(); - int32_t *buffer = THAlloc(8*n); - nread = fread__(buffer, 8, n, dfself->handle); - size_t i; - for(i = nread; i > 0; i--) - data[i-1] = buffer[2*(i-1) + big_endian]; - THFree(buffer); - if(!dfself->isNativeEncoding && (nread > 0)) - THDiskFile_reverseMemory(data, data, 4, nread); - } - } - else - { - size_t i; - for(i = 0; i < n; i++) - { - int ret = fscanf(dfself->handle, "%ld", &data[i]); if(ret <= 0) break; else nread++; - } - if(dfself->file.isAutoSpacing && (n > 0)) - { - int c = fgetc(dfself->handle); - if( (c != '\n') && (c != EOF) ) - ungetc(c, dfself->handle); - } - } - - if(nread != n) - { - dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ - if(!dfself->file.isQuiet) - THError("read error: read %d blocks instead of %d", nread, n); - } - - return nread; -} - -static size_t THDiskFile_writeLong(THFile *self, long *data, size_t n) -{ - THDiskFile *dfself = (THDiskFile*)(self); - size_t nwrite = 0L; - - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file"); - - if(dfself->file.isBinary) - { - if(dfself->longSize == 0 || dfself->longSize == sizeof(long)) - { - if(dfself->isNativeEncoding) - { - nwrite = fwrite(data, sizeof(long), n, dfself->handle); - } - else - { - char *buffer = THAlloc(sizeof(long)*n); - THDiskFile_reverseMemory(buffer, data, sizeof(long), n); - nwrite = fwrite(buffer, sizeof(long), n, dfself->handle); - THFree(buffer); - } - } else if(dfself->longSize == 4) - { - int32_t *buffer = THAlloc(4*n); - size_t i; - for(i = 0; i < n; i++) - buffer[i] = data[i]; - if(!dfself->isNativeEncoding) - THDiskFile_reverseMemory(buffer, buffer, 4, n); - nwrite = fwrite(buffer, 4, n, dfself->handle); - THFree(buffer); - } - else /* if(dfself->longSize == 8) */ - { - int big_endian = !THDiskFile_isLittleEndianCPU(); - int32_t *buffer = THAlloc(8*n); - size_t i; - for(i = 0; i < n; i++) - { - buffer[2*i + !big_endian] = 0; - buffer[2*i + big_endian] = data[i]; - } - if(!dfself->isNativeEncoding) - THDiskFile_reverseMemory(buffer, buffer, 8, n); - nwrite = fwrite(buffer, 8, n, dfself->handle); - THFree(buffer); - } - } - else - { - size_t i; - for(i = 0; i < n; i++) - { - int ret = fprintf(dfself->handle, "%ld", data[i]); if(ret <= 0) break; else nwrite++; - if( dfself->file.isAutoSpacing && (i < n-1) ) - fprintf(dfself->handle, " "); - } - if(dfself->file.isAutoSpacing && (n > 0)) - fprintf(dfself->handle, "\n"); - } - - if(nwrite != n) - { - dfself->file.hasError = 1; - if(!dfself->file.isQuiet) - THError("write error: wrote %d blocks instead of %d", nwrite, n); - } - - return nwrite; -} - -static size_t THDiskFile_readString(THFile *self, const char *format, char **str_) -{ - THDiskFile *dfself = (THDiskFile*)(self); - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - THArgCheck(dfself->file.isReadable, 1, "attempt to read in a write-only file"); - THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, "format must be '*a' or '*l'"); - -/* note: the string won't survive long, as it is copied into lua */ -/* so 1024 is not that big... */ -#define TBRS_BSZ 1024L - - if(format[1] == 'a') - { - char *p = THAlloc(TBRS_BSZ); - size_t total = TBRS_BSZ; - size_t pos = 0; - - for (;;) - { - if(total-pos == 0) /* we need more space! */ - { - total += TBRS_BSZ; - p = THRealloc(p, total); - } - pos += fread(p+pos, 1, total-pos, dfself->handle); - if (pos < total) /* eof? */ - { - if(pos == 0) - { - THFree(p); - dfself->file.hasError = 1; - if(!dfself->file.isQuiet) - THError("read error: read 0 blocks instead of 1"); - - *str_ = NULL; - return 0; - } - *str_ = p; - return pos; - } - } - } - else - { - char *p = THAlloc(TBRS_BSZ); - size_t total = TBRS_BSZ; - size_t pos = 0; - size_t size; - - for (;;) - { - if(total-pos <= 1) /* we can only write '\0' in there! */ - { - total += TBRS_BSZ; - p = THRealloc(p, total); - } - if (fgets(p+pos, total-pos, dfself->handle) == NULL) /* eof? */ - { - if(pos == 0) - { - THFree(p); - dfself->file.hasError = 1; - if(!dfself->file.isQuiet) - THError("read error: read 0 blocks instead of 1"); - - *str_ = NULL; - return 0; - } - *str_ = p; - return pos; - } - size = strlen(p+pos); - if (size == 0 || (p+pos)[size-1] != '\n') - { - pos += size; - } - else - { - pos += size-1; /* do not include `eol' */ - *str_ = p; - return pos; - } - } - } - - *str_ = NULL; - return 0; -} - - -static size_t THDiskFile_writeString(THFile *self, const char *str, size_t size) -{ - THDiskFile *dfself = (THDiskFile*)(self); - size_t nwrite; - - THArgCheck(dfself->handle != NULL, 1, "attempt to use a closed file"); - THArgCheck(dfself->file.isWritable, 1, "attempt to write in a read-only file"); - - nwrite = fwrite(str, 1, size, dfself->handle); - if(nwrite != size) - { - dfself->file.hasError = 1; - if(!dfself->file.isQuiet) - THError("write error: wrote %zu blocks instead of %zu", nwrite, size); - } - - return nwrite; -} - -THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet) -{ - static struct THFileVTable vtable = { - THDiskFile_isOpened, - - THDiskFile_readByte, - THDiskFile_readChar, - THDiskFile_readShort, - THDiskFile_readInt, - THDiskFile_readLong, - THDiskFile_readFloat, - THDiskFile_readDouble, - THDiskFile_readHalf, - THDiskFile_readString, - - THDiskFile_writeByte, - THDiskFile_writeChar, - THDiskFile_writeShort, - THDiskFile_writeInt, - THDiskFile_writeLong, - THDiskFile_writeFloat, - THDiskFile_writeDouble, - THDiskFile_writeHalf, - THDiskFile_writeString, - - THDiskFile_synchronize, - THDiskFile_seek, - THDiskFile_seekEnd, - THDiskFile_position, - THDiskFile_close, - THDiskFile_free - }; - - int isReadable; - int isWritable; - FILE *handle; - THDiskFile *self; - - THArgCheck(THDiskFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'"); - - if( isReadable && isWritable ) - { - handle = fopen(name, "r+b"); - if(!handle) - { - handle = fopen(name, "wb"); - if(handle) - { - fclose(handle); - handle = fopen(name, "r+b"); - } - } - } - else - handle = fopen(name, (isReadable ? "rb" : "wb")); - - if(!handle) - { - if(isQuiet) - return 0; - else - THError("cannot open <%s> in mode %c%c", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' ')); - } - - self = THAlloc(sizeof(THDiskFile)); - - self->handle = handle; - self->name = THAlloc(strlen(name)+1); - strcpy(self->name, name); - self->isNativeEncoding = 1; - self->longSize = 0; - - self->file.vtable = &vtable; - self->file.isQuiet = isQuiet; - self->file.isReadable = isReadable; - self->file.isWritable = isWritable; - self->file.isBinary = 0; - self->file.isAutoSpacing = 1; - self->file.hasError = 0; - - return (THFile*)self; -} - -/* PipeFile */ - -static int THPipeFile_mode(const char *mode, int *isReadable, int *isWritable) -{ - *isReadable = 0; - *isWritable = 0; - if(strlen(mode) == 1) - { - if(*mode == 'r') - { - *isReadable = 1; - return 1; - } - else if(*mode == 'w') - { - *isWritable = 1; - return 1; - } - } - return 0; -} - -static void THPipeFile_free(THFile *self) -{ - THDiskFile *dfself = (THDiskFile*)(self); - if(dfself->handle) - pclose(dfself->handle); - THFree(dfself->name); - THFree(dfself); -} - -THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet) -{ - static struct THFileVTable vtable = { - THDiskFile_isOpened, - - THDiskFile_readByte, - THDiskFile_readChar, - THDiskFile_readShort, - THDiskFile_readInt, - THDiskFile_readLong, - THDiskFile_readFloat, - THDiskFile_readDouble, - THDiskFile_readHalf, - THDiskFile_readString, - - THDiskFile_writeByte, - THDiskFile_writeChar, - THDiskFile_writeShort, - THDiskFile_writeInt, - THDiskFile_writeLong, - THDiskFile_writeFloat, - THDiskFile_writeDouble, - THDiskFile_writeHalf, - THDiskFile_writeString, - - THDiskFile_synchronize, - THDiskFile_seek, - THDiskFile_seekEnd, - THDiskFile_position, - THDiskFile_close, - THPipeFile_free - }; - - int isReadable; - int isWritable; - FILE *handle; - THDiskFile *self; - - THArgCheck(THPipeFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w'"); - -#ifdef _WIN32 - handle = _popen(name, (isReadable ? "rb" : "wb")); -#else - handle = popen(name, (isReadable ? "r" : "w")); -#endif - - if(!handle) - { - if(isQuiet) - return 0; - else - THError("cannot open <%s> in mode %c%c. This might be because eg the executable doesn't exist, but it could also be because you are out of memory.", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' ')); - } - - self = THAlloc(sizeof(THDiskFile)); - - self->handle = handle; - self->name = THAlloc(strlen(name)+1); - strcpy(self->name, name); - self->isNativeEncoding = 1; - self->longSize = 0; - - self->file.vtable = &vtable; - self->file.isQuiet = isQuiet; - self->file.isReadable = isReadable; - self->file.isWritable = isWritable; - self->file.isBinary = 0; - self->file.isAutoSpacing = 1; - self->file.hasError = 0; - - return (THFile*)self; -} diff --git a/contrib/lua-torch/torch7/lib/TH/THDiskFile.h b/contrib/lua-torch/torch7/lib/TH/THDiskFile.h deleted file mode 100644 index bc5c001c7..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THDiskFile.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef TH_DISK_FILE_INC -#define TH_DISK_FILE_INC - -#include "THFile.h" - -TH_API THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet); -TH_API THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet); - -TH_API const char *THDiskFile_name(THFile *self); - -TH_API int THDiskFile_isLittleEndianCPU(void); -TH_API int THDiskFile_isBigEndianCPU(void); -TH_API void THDiskFile_nativeEndianEncoding(THFile *self); -TH_API void THDiskFile_littleEndianEncoding(THFile *self); -TH_API void THDiskFile_bigEndianEncoding(THFile *self); -TH_API void THDiskFile_longSize(THFile *self, int size); -TH_API void THDiskFile_noBuffer(THFile *self); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THFile.c b/contrib/lua-torch/torch7/lib/TH/THFile.c deleted file mode 100644 index 3717b7b5c..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THFile.c +++ /dev/null @@ -1,157 +0,0 @@ -#include "THFile.h" -#include "THFilePrivate.h" - -#define IMPLEMENT_THFILE_RW(TYPEC, TYPE) \ - size_t THFile_read##TYPEC##Raw(THFile *self, TYPE *data, size_t n) \ - { \ - return (*self->vtable->read##TYPEC)(self, data, n); \ - } \ - \ - size_t THFile_write##TYPEC##Raw(THFile *self, TYPE *data, size_t n) \ - { \ - return (*self->vtable->write##TYPEC)(self, data, n); \ - } - -IMPLEMENT_THFILE_RW(Byte, unsigned char) -IMPLEMENT_THFILE_RW(Char, char) -IMPLEMENT_THFILE_RW(Short, short) -IMPLEMENT_THFILE_RW(Int, int) -IMPLEMENT_THFILE_RW(Long, long) -IMPLEMENT_THFILE_RW(Float, float) -IMPLEMENT_THFILE_RW(Double, double) -IMPLEMENT_THFILE_RW(Half, THHalf) - -size_t THFile_readStringRaw(THFile *self, const char *format, char **str_) -{ - return self->vtable->readString(self, format, str_); -} - -size_t THFile_writeStringRaw(THFile *self, const char *str, size_t size) -{ - return self->vtable->writeString(self, str, size); -} - -void THFile_synchronize(THFile *self) -{ - self->vtable->synchronize(self); -} - -void THFile_seek(THFile *self, size_t position) -{ - self->vtable->seek(self, position); -} - -void THFile_seekEnd(THFile *self) -{ - self->vtable->seekEnd(self); -} - -size_t THFile_position(THFile *self) -{ - return self->vtable->position(self); -} - -void THFile_close(THFile *self) -{ - self->vtable->close(self); -} - -void THFile_free(THFile *self) -{ - self->vtable->free(self); -} - -int THFile_isOpened(THFile *self) -{ - return self->vtable->isOpened(self); -} - -#define IMPLEMENT_THFILE_FLAGS(FLAG) \ - int THFile_##FLAG(THFile *self) \ - { \ - return self->FLAG; \ - } - -IMPLEMENT_THFILE_FLAGS(isQuiet) -IMPLEMENT_THFILE_FLAGS(isReadable) -IMPLEMENT_THFILE_FLAGS(isWritable) -IMPLEMENT_THFILE_FLAGS(isBinary) -IMPLEMENT_THFILE_FLAGS(isAutoSpacing) -IMPLEMENT_THFILE_FLAGS(hasError) - -void THFile_binary(THFile *self) -{ - self->isBinary = 1; -} - -void THFile_ascii(THFile *self) -{ - self->isBinary = 0; -} - -void THFile_autoSpacing(THFile *self) -{ - self->isAutoSpacing = 1; -} - -void THFile_noAutoSpacing(THFile *self) -{ - self->isAutoSpacing = 0; -} - -void THFile_quiet(THFile *self) -{ - self->isQuiet = 1; -} - -void THFile_pedantic(THFile *self) -{ - self->isQuiet = 0; -} - -void THFile_clearError(THFile *self) -{ - self->hasError = 0; -} - -#define IMPLEMENT_THFILE_SCALAR(TYPEC, TYPE) \ - TYPE THFile_read##TYPEC##Scalar(THFile *self) \ - { \ - TYPE scalar; \ - THFile_read##TYPEC##Raw(self, &scalar, 1); \ - return scalar; \ - } \ - \ - void THFile_write##TYPEC##Scalar(THFile *self, TYPE scalar) \ - { \ - THFile_write##TYPEC##Raw(self, &scalar, 1); \ - } - -IMPLEMENT_THFILE_SCALAR(Byte, unsigned char) -IMPLEMENT_THFILE_SCALAR(Char, char) -IMPLEMENT_THFILE_SCALAR(Short, short) -IMPLEMENT_THFILE_SCALAR(Int, int) -IMPLEMENT_THFILE_SCALAR(Long, long) -IMPLEMENT_THFILE_SCALAR(Float, float) -IMPLEMENT_THFILE_SCALAR(Double, double) -IMPLEMENT_THFILE_SCALAR(Half, THHalf) - -#define IMPLEMENT_THFILE_STORAGE(TYPEC, TYPE) \ - size_t THFile_read##TYPEC(THFile *self, TH##TYPEC##Storage *storage) \ - { \ - return THFile_read##TYPEC##Raw(self, storage->data, storage->size); \ - } \ - \ - size_t THFile_write##TYPEC(THFile *self, TH##TYPEC##Storage *storage) \ - { \ - return THFile_write##TYPEC##Raw(self, storage->data, storage->size); \ - } - -IMPLEMENT_THFILE_STORAGE(Byte, unsigned char) -IMPLEMENT_THFILE_STORAGE(Char, char) -IMPLEMENT_THFILE_STORAGE(Short, short) -IMPLEMENT_THFILE_STORAGE(Int, int) -IMPLEMENT_THFILE_STORAGE(Long, long) -IMPLEMENT_THFILE_STORAGE(Float, float) -IMPLEMENT_THFILE_STORAGE(Double, double) -IMPLEMENT_THFILE_STORAGE(Half, THHalf) diff --git a/contrib/lua-torch/torch7/lib/TH/THFile.h b/contrib/lua-torch/torch7/lib/TH/THFile.h deleted file mode 100644 index e097bdf34..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THFile.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef TH_FILE_INC -#define TH_FILE_INC - -#include "THStorage.h" - -typedef struct THFile__ THFile; - -TH_API int THFile_isOpened(THFile *self); -TH_API int THFile_isQuiet(THFile *self); -TH_API int THFile_isReadable(THFile *self); -TH_API int THFile_isWritable(THFile *self); -TH_API int THFile_isBinary(THFile *self); -TH_API int THFile_isAutoSpacing(THFile *self); -TH_API int THFile_hasError(THFile *self); - -TH_API void THFile_binary(THFile *self); -TH_API void THFile_ascii(THFile *self); -TH_API void THFile_autoSpacing(THFile *self); -TH_API void THFile_noAutoSpacing(THFile *self); -TH_API void THFile_quiet(THFile *self); -TH_API void THFile_pedantic(THFile *self); -TH_API void THFile_clearError(THFile *self); - -/* scalar */ -TH_API unsigned char THFile_readByteScalar(THFile *self); -TH_API char THFile_readCharScalar(THFile *self); -TH_API short THFile_readShortScalar(THFile *self); -TH_API int THFile_readIntScalar(THFile *self); -TH_API long THFile_readLongScalar(THFile *self); -TH_API float THFile_readFloatScalar(THFile *self); -TH_API double THFile_readDoubleScalar(THFile *self); - -TH_API void THFile_writeByteScalar(THFile *self, unsigned char scalar); -TH_API void THFile_writeCharScalar(THFile *self, char scalar); -TH_API void THFile_writeShortScalar(THFile *self, short scalar); -TH_API void THFile_writeIntScalar(THFile *self, int scalar); -TH_API void THFile_writeLongScalar(THFile *self, long scalar); -TH_API void THFile_writeFloatScalar(THFile *self, float scalar); -TH_API void THFile_writeDoubleScalar(THFile *self, double scalar); - -/* storage */ -TH_API size_t THFile_readByte(THFile *self, THByteStorage *storage); -TH_API size_t THFile_readChar(THFile *self, THCharStorage *storage); -TH_API size_t THFile_readShort(THFile *self, THShortStorage *storage); -TH_API size_t THFile_readInt(THFile *self, THIntStorage *storage); -TH_API size_t THFile_readLong(THFile *self, THLongStorage *storage); -TH_API size_t THFile_readFloat(THFile *self, THFloatStorage *storage); -TH_API size_t THFile_readDouble(THFile *self, THDoubleStorage *storage); - -TH_API size_t THFile_writeByte(THFile *self, THByteStorage *storage); -TH_API size_t THFile_writeChar(THFile *self, THCharStorage *storage); -TH_API size_t THFile_writeShort(THFile *self, THShortStorage *storage); -TH_API size_t THFile_writeInt(THFile *self, THIntStorage *storage); -TH_API size_t THFile_writeLong(THFile *self, THLongStorage *storage); -TH_API size_t THFile_writeFloat(THFile *self, THFloatStorage *storage); -TH_API size_t THFile_writeDouble(THFile *self, THDoubleStorage *storage); - -/* raw */ -TH_API size_t THFile_readByteRaw(THFile *self, unsigned char *data, size_t n); -TH_API size_t THFile_readCharRaw(THFile *self, char *data, size_t n); -TH_API size_t THFile_readShortRaw(THFile *self, short *data, size_t n); -TH_API size_t THFile_readIntRaw(THFile *self, int *data, size_t n); -TH_API size_t THFile_readLongRaw(THFile *self, long *data, size_t n); -TH_API size_t THFile_readFloatRaw(THFile *self, float *data, size_t n); -TH_API size_t THFile_readDoubleRaw(THFile *self, double *data, size_t n); -TH_API size_t THFile_readStringRaw(THFile *self, const char *format, char **str_); /* you must deallocate str_ */ - -TH_API size_t THFile_writeByteRaw(THFile *self, unsigned char *data, size_t n); -TH_API size_t THFile_writeCharRaw(THFile *self, char *data, size_t n); -TH_API size_t THFile_writeShortRaw(THFile *self, short *data, size_t n); -TH_API size_t THFile_writeIntRaw(THFile *self, int *data, size_t n); -TH_API size_t THFile_writeLongRaw(THFile *self, long *data, size_t n); -TH_API size_t THFile_writeFloatRaw(THFile *self, float *data, size_t n); -TH_API size_t THFile_writeDoubleRaw(THFile *self, double *data, size_t n); -TH_API size_t THFile_writeStringRaw(THFile *self, const char *str, size_t size); - -TH_API THHalf THFile_readHalfScalar(THFile *self); -TH_API void THFile_writeHalfScalar(THFile *self, THHalf scalar); -TH_API size_t THFile_readHalf(THFile *self, THHalfStorage *storage); -TH_API size_t THFile_writeHalf(THFile *self, THHalfStorage *storage); -TH_API size_t THFile_readHalfRaw(THFile *self, THHalf* data, size_t size); -TH_API size_t THFile_writeHalfRaw(THFile *self, THHalf* data, size_t size); - -TH_API void THFile_synchronize(THFile *self); -TH_API void THFile_seek(THFile *self, size_t position); -TH_API void THFile_seekEnd(THFile *self); -TH_API size_t THFile_position(THFile *self); -TH_API void THFile_close(THFile *self); -TH_API void THFile_free(THFile *self); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THFilePrivate.h b/contrib/lua-torch/torch7/lib/TH/THFilePrivate.h deleted file mode 100644 index 55169c3bc..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THFilePrivate.h +++ /dev/null @@ -1,50 +0,0 @@ -#include "THGeneral.h" - -#include "THHalf.h" - - -struct THFile__ -{ - struct THFileVTable *vtable; - - int isQuiet; - int isReadable; - int isWritable; - int isBinary; - int isAutoSpacing; - int hasError; -}; - -/* virtual table definition */ - -struct THFileVTable -{ - int (*isOpened)(THFile *self); - - size_t (*readByte)(THFile *self, unsigned char *data, size_t n); - size_t (*readChar)(THFile *self, char *data, size_t n); - size_t (*readShort)(THFile *self, short *data, size_t n); - size_t (*readInt)(THFile *self, int *data, size_t n); - size_t (*readLong)(THFile *self, long *data, size_t n); - size_t (*readFloat)(THFile *self, float *data, size_t n); - size_t (*readDouble)(THFile *self, double *data, size_t n); - size_t (*readHalf)(THFile *self, THHalf *data, size_t n); - size_t (*readString)(THFile *self, const char *format, char **str_); - - size_t (*writeByte)(THFile *self, unsigned char *data, size_t n); - size_t (*writeChar)(THFile *self, char *data, size_t n); - size_t (*writeShort)(THFile *self, short *data, size_t n); - size_t (*writeInt)(THFile *self, int *data, size_t n); - size_t (*writeLong)(THFile *self, long *data, size_t n); - size_t (*writeFloat)(THFile *self, float *data, size_t n); - size_t (*writeDouble)(THFile *self, double *data, size_t n); - size_t (*writeHalf)(THFile *self, THHalf *data, size_t n); - size_t (*writeString)(THFile *self, const char *str, size_t size); - - void (*synchronize)(THFile *self); - void (*seek)(THFile *self, size_t position); - void (*seekEnd)(THFile *self); - size_t (*position)(THFile *self); - void (*close)(THFile *self); - void (*free)(THFile *self); -}; diff --git a/contrib/lua-torch/torch7/lib/TH/THGeneral.c b/contrib/lua-torch/torch7/lib/TH/THGeneral.c deleted file mode 100644 index f093c422f..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGeneral.c +++ /dev/null @@ -1,406 +0,0 @@ -#include "THGeneral.h" -#include "THAtomic.h" - -#ifdef _OPENMP -#include <omp.h> -#endif - -#ifndef TH_HAVE_THREAD -#define __thread -#elif _MSC_VER -#define __thread __declspec( thread ) -#endif - -#if defined(__APPLE__) -#include <malloc/malloc.h> -#endif - -#if defined(__linux__) -#include <malloc.h> -#endif - -#if defined(__FreeBSD__) -#include <malloc_np.h> -#endif - -/* Torch Error Handling */ -static void defaultErrorHandlerFunction(const char *msg, void *data) -{ - printf("$ Error: %s\n", msg); - abort(); -} - -static THErrorHandlerFunction defaultErrorHandler = defaultErrorHandlerFunction; -static void *defaultErrorHandlerData; -static __thread THErrorHandlerFunction threadErrorHandler = NULL; -static __thread void *threadErrorHandlerData; - -void _THError(const char *file, const int line, const char *fmt, ...) -{ - char msg[2048]; - va_list args; - - /* vasprintf not standard */ - /* vsnprintf: how to handle if does not exists? */ - va_start(args, fmt); - int n = vsnprintf(msg, 2048, fmt, args); - va_end(args); - - if(n < 2048) { - snprintf(msg + n, 2048 - n, " at %s:%d", file, line); - } - - if (threadErrorHandler) - (*threadErrorHandler)(msg, threadErrorHandlerData); - else - (*defaultErrorHandler)(msg, defaultErrorHandlerData); -} - -void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...) { - char msg[1024]; - va_list args; - va_start(args, fmt); - vsnprintf(msg, 1024, fmt, args); - va_end(args); - _THError(file, line, "Assertion `%s' failed. %s", exp, msg); -} - -void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data) -{ - threadErrorHandler = new_handler; - threadErrorHandlerData = data; -} - -void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data) -{ - if (new_handler) - defaultErrorHandler = new_handler; - else - defaultErrorHandler = defaultErrorHandlerFunction; - defaultErrorHandlerData = data; -} - -/* Torch Arg Checking Handling */ -static void defaultArgErrorHandlerFunction(int argNumber, const char *msg, void *data) -{ - if(msg) - printf("$ Invalid argument %d: %s\n", argNumber, msg); - else - printf("$ Invalid argument %d\n", argNumber); - exit(-1); -} - -static THArgErrorHandlerFunction defaultArgErrorHandler = defaultArgErrorHandlerFunction; -static void *defaultArgErrorHandlerData; -static __thread THArgErrorHandlerFunction threadArgErrorHandler = NULL; -static __thread void *threadArgErrorHandlerData; - -void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...) -{ - if(!condition) { - char msg[2048]; - va_list args; - - /* vasprintf not standard */ - /* vsnprintf: how to handle if does not exists? */ - va_start(args, fmt); - int n = vsnprintf(msg, 2048, fmt, args); - va_end(args); - - if(n < 2048) { - snprintf(msg + n, 2048 - n, " at %s:%d", file, line); - } - - if (threadArgErrorHandler) - (*threadArgErrorHandler)(argNumber, msg, threadArgErrorHandlerData); - else - (*defaultArgErrorHandler)(argNumber, msg, defaultArgErrorHandlerData); - } -} - -void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data) -{ - threadArgErrorHandler = new_handler; - threadArgErrorHandlerData = data; -} - -void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data) -{ - if (new_handler) - defaultArgErrorHandler = new_handler; - else - defaultArgErrorHandler = defaultArgErrorHandlerFunction; - defaultArgErrorHandlerData = data; -} - -static __thread void (*torchGCFunction)(void *data) = NULL; -static __thread void *torchGCData; -static ptrdiff_t heapSize = 0; -static __thread ptrdiff_t heapDelta = 0; -static const ptrdiff_t heapMaxDelta = (ptrdiff_t)1e6; // limit to +/- 1MB before updating heapSize -static const ptrdiff_t heapMinDelta = (ptrdiff_t)-1e6; -static __thread ptrdiff_t heapSoftmax = (ptrdiff_t)3e8; // 300MB, adjusted upward dynamically -static const double heapSoftmaxGrowthThresh = 0.8; // grow softmax if >80% max after GC -static const double heapSoftmaxGrowthFactor = 1.4; // grow softmax by 40% - -/* Optional hook for integrating with a garbage-collected frontend. - * - * If torch is running with a garbage-collected frontend (e.g. Lua), - * the GC isn't aware of TH-allocated memory so may not know when it - * needs to run. These hooks trigger the GC to run in two cases: - * - * (1) When a memory allocation (malloc, realloc, ...) fails - * (2) When the total TH-allocated memory hits a dynamically-adjusted - * soft maximum. - */ -void THSetGCHandler( void (*torchGCFunction_)(void *data), void *data ) -{ - torchGCFunction = torchGCFunction_; - torchGCData = data; -} - -/* it is guaranteed the allocated size is not bigger than PTRDIFF_MAX */ -static ptrdiff_t getAllocSize(void *ptr) { -#if defined(__unix) && defined(HAVE_MALLOC_USABLE_SIZE) - return malloc_usable_size(ptr); -#elif defined(__APPLE__) - return malloc_size(ptr); -#elif defined(_WIN32) - if(ptr) { return _msize(ptr); } else { return 0; } -#else - return 0; -#endif -} - -static ptrdiff_t applyHeapDelta() { - ptrdiff_t oldHeapSize = THAtomicAddPtrdiff(&heapSize, heapDelta); -#ifdef DEBUG - if (heapDelta > 0 && oldHeapSize > PTRDIFF_MAX - heapDelta) - THError("applyHeapDelta: heapSize(%td) + increased(%td) > PTRDIFF_MAX, heapSize overflow!", oldHeapSize, heapDelta); - if (heapDelta < 0 && oldHeapSize < PTRDIFF_MIN - heapDelta) - THError("applyHeapDelta: heapSize(%td) + decreased(%td) < PTRDIFF_MIN, heapSize underflow!", oldHeapSize, heapDelta); -#endif - ptrdiff_t newHeapSize = oldHeapSize + heapDelta; - heapDelta = 0; - return newHeapSize; -} - -/* (1) if the torch-allocated heap size exceeds the soft max, run GC - * (2) if post-GC heap size exceeds 80% of the soft max, increase the - * soft max by 40% - */ -static void maybeTriggerGC(ptrdiff_t curHeapSize) { - if (torchGCFunction && curHeapSize > heapSoftmax) { - torchGCFunction(torchGCData); - - // ensure heapSize is accurate before updating heapSoftmax - ptrdiff_t newHeapSize = applyHeapDelta(); - - if (newHeapSize > heapSoftmax * heapSoftmaxGrowthThresh) { - heapSoftmax = (ptrdiff_t)(heapSoftmax * heapSoftmaxGrowthFactor); - } - } -} - -// hooks into the TH heap tracking -void THHeapUpdate(ptrdiff_t size) { -#ifdef DEBUG - if (size > 0 && heapDelta > PTRDIFF_MAX - size) - THError("THHeapUpdate: heapDelta(%td) + increased(%td) > PTRDIFF_MAX, heapDelta overflow!", heapDelta, size); - if (size < 0 && heapDelta < PTRDIFF_MIN - size) - THError("THHeapUpdate: heapDelta(%td) + decreased(%td) < PTRDIFF_MIN, heapDelta underflow!", heapDelta, size); -#endif - - heapDelta += size; - - // batch updates to global heapSize to minimize thread contention - if (heapDelta < heapMaxDelta && heapDelta > heapMinDelta) { - return; - } - - ptrdiff_t newHeapSize = applyHeapDelta(); - - if (size > 0) { - maybeTriggerGC(newHeapSize); - } -} - -static void* THAllocInternal(ptrdiff_t size) -{ - void *ptr; - - if (size > 5120) - { -#if (defined(__unix) || defined(__APPLE__)) && (!defined(DISABLE_POSIX_MEMALIGN)) - if (posix_memalign(&ptr, 64, size) != 0) - ptr = NULL; -/* -#elif defined(_WIN32) - ptr = _aligned_malloc(size, 64); -*/ -#else - ptr = malloc(size); -#endif - } - else - { - ptr = malloc(size); - } - - THHeapUpdate(getAllocSize(ptr)); - return ptr; -} - -void* THAlloc(ptrdiff_t size) -{ - void *ptr; - - if(size < 0) - THError("$ Torch: invalid memory size -- maybe an overflow?"); - - if(size == 0) - return NULL; - - ptr = THAllocInternal(size); - - if(!ptr && torchGCFunction) { - torchGCFunction(torchGCData); - ptr = THAllocInternal(size); - } - - if(!ptr) - THError("$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824); - - return ptr; -} - -void* THRealloc(void *ptr, ptrdiff_t size) -{ - if(!ptr) - return(THAlloc(size)); - - if(size == 0) - { - THFree(ptr); - return NULL; - } - - if(size < 0) - THError("$ Torch: invalid memory size -- maybe an overflow?"); - - ptrdiff_t oldSize = -getAllocSize(ptr); - void *newptr = realloc(ptr, size); - - if(!newptr && torchGCFunction) { - torchGCFunction(torchGCData); - newptr = realloc(ptr, size); - } - - if(!newptr) - THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); - - // update heapSize only after successfully reallocated - THHeapUpdate(oldSize + getAllocSize(newptr)); - - return newptr; -} - -void THFree(void *ptr) -{ - THHeapUpdate(-getAllocSize(ptr)); - free(ptr); -} - -double THLog1p(const double x) -{ -#if (defined(_MSC_VER) || defined(__MINGW32__)) - volatile double y = 1 + x; - return log(y) - ((y-1)-x)/y ; /* cancels errors with IEEE arithmetic */ -#else - return log1p(x); -#endif -} - -void THSetNumThreads(int num_threads) -{ -#ifdef _OPENMP - omp_set_num_threads(num_threads); -#endif -#ifdef TH_BLAS_OPEN - extern void openblas_set_num_threads(int); - openblas_set_num_threads(num_threads); -#endif -#ifdef TH_BLAS_MKL - extern void mkl_set_num_threads(int); - mkl_set_num_threads(num_threads); - -#endif -} - -int THGetNumThreads(void) -{ - int nthreads = 1; -#ifdef _OPENMP - nthreads = omp_get_max_threads(); -#endif -#ifdef TH_BLAS_OPEN - int bl_threads = 1; - extern int openblas_get_num_threads(void); - bl_threads = openblas_get_num_threads(); - nthreads = nthreads > bl_threads ? bl_threads : nthreads; -#endif -#ifdef TH_BLAS_MKL - int bl_threads = 1; - extern int mkl_get_max_threads(void); - bl_threads = mkl_get_max_threads(); - nthreads = nthreads > bl_threads ? bl_threads : nthreads; -#endif - return nthreads; -} - -int THGetNumCores(void) -{ -#ifdef _OPENMP - return omp_get_num_procs(); -#else - return 1; -#endif -} - -#ifdef TH_BLAS_MKL -extern int mkl_get_max_threads(void); -#endif - -TH_API void THInferNumThreads(void) -{ -#if defined(_OPENMP) && defined(TH_BLAS_MKL) - // If we are using MKL an OpenMP make sure the number of threads match. - // Otherwise, MKL and our OpenMP-enabled functions will keep changing the - // size of the OpenMP thread pool, resulting in worse performance (and memory - // leaks in GCC 5.4) - omp_set_num_threads(mkl_get_max_threads()); -#endif -} - -TH_API THDescBuff _THSizeDesc(const long *size, const long ndim) { - const int L = TH_DESC_BUFF_LEN; - THDescBuff buf; - char *str = buf.str; - int n = 0; - n += snprintf(str, L-n, "["); - int i; - for(i = 0; i < ndim; i++) { - if(n >= L) break; - n += snprintf(str+n, L-n, "%ld", size[i]); - if(i < ndim-1) { - n += snprintf(str+n, L-n, " x "); - } - } - if(n < L - 2) { - snprintf(str+n, L-n, "]"); - } else { - snprintf(str+L-5, 5, "...]"); - } - return buf; -} - diff --git a/contrib/lua-torch/torch7/lib/TH/THGeneral.h.in b/contrib/lua-torch/torch7/lib/TH/THGeneral.h.in deleted file mode 100644 index 88a3934c8..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGeneral.h.in +++ /dev/null @@ -1,130 +0,0 @@ -#ifndef TH_GENERAL_INC -#define TH_GENERAL_INC - -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <math.h> -#include <limits.h> -#include <float.h> -#include <time.h> -#include <string.h> -#include <stddef.h> - -#cmakedefine USE_BLAS -#cmakedefine USE_LAPACK -#cmakedefine BLAS_F2C - -#ifdef __cplusplus -# define TH_EXTERNC extern "C" -#else -# define TH_EXTERNC extern -#endif - -#ifdef _WIN32 -# ifdef TH_EXPORTS -# define TH_API TH_EXTERNC __declspec(dllexport) -# else -# define TH_API TH_EXTERNC __declspec(dllimport) -# endif -#else -# define TH_API TH_EXTERNC -#endif - -#ifndef M_PI -# define M_PI 3.14159265358979323846 -#endif - -#ifndef TH_INDEX_BASE -#define TH_INDEX_BASE 1 -#endif - -typedef void (*THErrorHandlerFunction)(const char *msg, void *data); -typedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data); - -#define TH_DESC_BUFF_LEN 64 -typedef struct { - char str[TH_DESC_BUFF_LEN]; -} THDescBuff; - - -TH_API double THLog1p(const double x); -TH_API THDescBuff _THSizeDesc(const long *size, const long ndim); -TH_API void _THError(const char *file, const int line, const char *fmt, ...); -TH_API void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...); -TH_API void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data); -TH_API void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data); -TH_API void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...); -TH_API void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data); -TH_API void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data); -TH_API void* THAlloc(ptrdiff_t size); -TH_API void* THRealloc(void *ptr, ptrdiff_t size); -TH_API void THFree(void *ptr); -TH_API void THSetGCHandler( void (*torchGCHandlerFunction)(void *data), void *data ); -// this hook should only be called by custom allocator functions -TH_API void THHeapUpdate(ptrdiff_t size); -TH_API void THSetNumThreads(int num_threads); -TH_API int THGetNumThreads(void); -TH_API int THGetNumCores(void); -TH_API void THInferNumThreads(void); - -#define THError(...) _THError(__FILE__, __LINE__, __VA_ARGS__) - -#define THCleanup(...) __VA_ARGS__ - -#define THArgCheck(...) \ -do { \ - _THArgCheck(__FILE__, __LINE__, __VA_ARGS__); \ -} while(0) - -#define THArgCheckWithCleanup(condition, cleanup, ...) \ -do if (!(condition)) { \ - cleanup \ - _THArgCheck(__FILE__, __LINE__, 0, __VA_ARGS__); \ -} while(0) - -#define THAssert(exp) \ -do { \ - if (!(exp)) { \ - _THAssertionFailed(__FILE__, __LINE__, #exp, ""); \ - } \ -} while(0) - -#define THAssertMsg(exp, ...) \ -do { \ - if (!(exp)) { \ - _THAssertionFailed(__FILE__, __LINE__, #exp, __VA_ARGS__); \ - } \ -} while(0) - -#define TH_CONCAT_STRING_2(x,y) TH_CONCAT_STRING_2_EXPAND(x,y) -#define TH_CONCAT_STRING_2_EXPAND(x,y) #x #y - -#define TH_CONCAT_STRING_3(x,y,z) TH_CONCAT_STRING_3_EXPAND(x,y,z) -#define TH_CONCAT_STRING_3_EXPAND(x,y,z) #x #y #z - -#define TH_CONCAT_STRING_4(x,y,z,w) TH_CONCAT_STRING_4_EXPAND(x,y,z,w) -#define TH_CONCAT_STRING_4_EXPAND(x,y,z,w) #x #y #z #w - -#define TH_CONCAT_2(x,y) TH_CONCAT_2_EXPAND(x,y) -#define TH_CONCAT_2_EXPAND(x,y) x ## y - -#define TH_CONCAT_3(x,y,z) TH_CONCAT_3_EXPAND(x,y,z) -#define TH_CONCAT_3_EXPAND(x,y,z) x ## y ## z - -#define TH_CONCAT_4_EXPAND(x,y,z,w) x ## y ## z ## w -#define TH_CONCAT_4(x,y,z,w) TH_CONCAT_4_EXPAND(x,y,z,w) - -#define THMin(X, Y) ((X) < (Y) ? (X) : (Y)) -#define THMax(X, Y) ((X) > (Y) ? (X) : (Y)) - -#if (defined(_MSC_VER) || defined(__MINGW32__)) -# define log1p(x) THLog1p(x) -#define snprintf _snprintf -#define popen _popen -#define pclose _pclose -#include <BaseTsd.h> -typedef SSIZE_T ssize_t; -#endif - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateAllTypes.h b/contrib/lua-torch/torch7/lib/TH/THGenerateAllTypes.h deleted file mode 100644 index 5b9508df7..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateAllTypes.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateAllTypes.h" -#endif - -#ifndef THGenerateManyTypes -#define THAllLocalGenerateManyTypes -#define THGenerateManyTypes -#endif - -#include "THGenerateFloatTypes.h" -#include "THGenerateIntTypes.h" - -#ifdef THAllLocalGenerateManyTypes -#undef THAllLocalGenerateManyTypes -#undef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateByteType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateByteType.h deleted file mode 100644 index 71ce7c405..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateByteType.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateByteType.h" -#endif - -#define real unsigned char -#define accreal long -#define Real Byte -#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) -#define THInf UCHAR_MAX -#define TH_REAL_IS_BYTE -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef real -#undef accreal -#undef Real -#undef THInf -#undef TH_REAL_IS_BYTE -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateCharType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateCharType.h deleted file mode 100644 index 158dd0e80..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateCharType.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateCharType.h" -#endif - -#define real char -#define accreal long -#define Real Char -#define THInf CHAR_MAX -#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) -#define TH_REAL_IS_CHAR -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef real -#undef accreal -#undef Real -#undef THInf -#undef TH_REAL_IS_CHAR -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateDoubleType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateDoubleType.h deleted file mode 100644 index fffee606d..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateDoubleType.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateDoubleType.h" -#endif - -#define real double -#define accreal double -#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) -#define Real Double -#define THInf DBL_MAX -#define TH_REAL_IS_DOUBLE -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef accreal -#undef real -#undef Real -#undef THInf -#undef TH_REAL_IS_DOUBLE -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateFloatType.h deleted file mode 100644 index a31b50c55..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatType.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateFloatType.h" -#endif - -#define real float -#define accreal double -#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) -#define Real Float -#define THInf FLT_MAX -#define TH_REAL_IS_FLOAT -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef accreal -#undef real -#undef Real -#undef THInf -#undef TH_REAL_IS_FLOAT -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatTypes.h b/contrib/lua-torch/torch7/lib/TH/THGenerateFloatTypes.h deleted file mode 100644 index be5ea8403..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateFloatTypes.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h" -#endif - -#ifndef THGenerateManyTypes -#define THFloatLocalGenerateManyTypes -#define THGenerateManyTypes -#endif - -#include "THGenerateFloatType.h" -#include "THGenerateDoubleType.h" - -#ifdef THFloatLocalGenerateManyTypes -#undef THFloatLocalGenerateManyTypes -#undef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateHalfType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateHalfType.h deleted file mode 100644 index 47ff1e8d7..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateHalfType.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateHalfType.h" -#endif - -#include "THHalf.h" -#define real THHalf -#define accreal float -#define TH_CONVERT_REAL_TO_ACCREAL(_val) TH_half2float(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) TH_float2half(_val) -#define Real Half -#define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF) -#define TH_REAL_IS_HALF -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef real -#undef accreal -#undef Real -#undef THInf -#undef TH_REAL_IS_HALF -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateIntType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateIntType.h deleted file mode 100644 index 1562b9e98..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateIntType.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateIntType.h" -#endif - -#define real int -#define accreal long -#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) -#define Real Int -#define THInf INT_MAX -#define TH_REAL_IS_INT -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef real -#undef accreal -#undef Real -#undef THInf -#undef TH_REAL_IS_INT -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateIntTypes.h b/contrib/lua-torch/torch7/lib/TH/THGenerateIntTypes.h deleted file mode 100644 index 9931fb1f5..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateIntTypes.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateIntTypes.h" -#endif - -#ifndef THGenerateManyTypes -#define THIntLocalGenerateManyTypes -#define THGenerateManyTypes -#endif - -#include "THGenerateByteType.h" -#include "THGenerateCharType.h" -#include "THGenerateShortType.h" -#include "THGenerateIntType.h" -#include "THGenerateLongType.h" - -#ifdef THIntLocalGenerateManyTypes -#undef THIntLocalGenerateManyTypes -#undef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateLongType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateLongType.h deleted file mode 100644 index 75f90e1a6..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateLongType.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateLongType.h" -#endif - -#define real long -#define accreal long -#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) -#define Real Long -#define THInf LONG_MAX -#define TH_REAL_IS_LONG -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef real -#undef accreal -#undef Real -#undef THInf -#undef TH_REAL_IS_LONG -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THGenerateShortType.h b/contrib/lua-torch/torch7/lib/TH/THGenerateShortType.h deleted file mode 100644 index 047e51a8d..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THGenerateShortType.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TH_GENERIC_FILE -#error "You must define TH_GENERIC_FILE before including THGenerateShortType.h" -#endif - -#define real short -#define accreal long -#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val) -#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val) -#define Real Short -#define THInf SHRT_MAX -#define TH_REAL_IS_SHORT -#line 1 TH_GENERIC_FILE -#include TH_GENERIC_FILE -#undef real -#undef accreal -#undef Real -#undef THInf -#undef TH_REAL_IS_SHORT -#undef TH_CONVERT_REAL_TO_ACCREAL -#undef TH_CONVERT_ACCREAL_TO_REAL - -#ifndef THGenerateManyTypes -#undef TH_GENERIC_FILE -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THHalf.c b/contrib/lua-torch/torch7/lib/TH/THHalf.c deleted file mode 100644 index d7468ac3d..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THHalf.c +++ /dev/null @@ -1,100 +0,0 @@ -#include "THHalf.h" - -/* Copyright 1993-2014 NVIDIA Corporation. All rights reserved. */ - -THHalf TH_float2half(float f) -{ - THHalf h; - TH_float2halfbits(&f, &h.x); - return h; -} - -TH_API float TH_half2float(THHalf h) -{ - float f; - TH_halfbits2float(&h.x, &f); - return f; -} - -// Host functions for converting between FP32 and FP16 formats - -void TH_halfbits2float(unsigned short* src, float* res) -{ - unsigned h = *src; - unsigned sign = ((h >> 15) & 1); - unsigned exponent = ((h >> 10) & 0x1f); - unsigned mantissa = ((h & 0x3ff) << 13); - - if (exponent == 0x1f) { /* NaN or Inf */ - mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0); - exponent = 0xff; - } else if (!exponent) { /* Denorm or Zero */ - if (mantissa) { - unsigned int msb; - exponent = 0x71; - do { - msb = (mantissa & 0x400000); - mantissa <<= 1; /* normalize */ - --exponent; - } while (!msb); - mantissa &= 0x7fffff; /* 1.mantissa is implicit */ - } - } else { - exponent += 0x70; - } - - *(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa); -} - -void TH_float2halfbits(float* src, unsigned short* dest) -{ - unsigned x = *(unsigned*)src; - unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1; - unsigned sign, exponent, mantissa; - - // Get rid of +NaN/-NaN case first. - if (u > 0x7f800000) { - *dest = 0x7fffU; - return ; - } - - sign = ((x >> 16) & 0x8000); - - // Get rid of +Inf/-Inf, +0/-0. - if (u > 0x477fefff) { - *dest = sign | 0x7c00U; - return; - } - if (u < 0x33000001) { - *dest = (sign | 0x0000); - return; - } - - exponent = ((u >> 23) & 0xff); - mantissa = (u & 0x7fffff); - - if (exponent > 0x70) { - shift = 13; - exponent -= 0x70; - } else { - shift = 0x7e - exponent; - exponent = 0; - mantissa |= 0x800000; - } - lsb = (1 << shift); - lsb_s1 = (lsb >> 1); - lsb_m1 = (lsb - 1); - - // Round to nearest even. - remainder = (mantissa & lsb_m1); - mantissa >>= shift; - if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) { - ++mantissa; - if (!(mantissa & 0x3ff)) { - ++exponent; - mantissa = 0; - } - } - - *dest = (sign | (exponent << 10) | mantissa); -} diff --git a/contrib/lua-torch/torch7/lib/TH/THHalf.h b/contrib/lua-torch/torch7/lib/TH/THHalf.h deleted file mode 100644 index 0f9807b50..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THHalf.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef TH_HALF_H -#define TH_HALF_H - -#include "THGeneral.h" -#include <stdint.h> - -/* Neither built-in nor included from Cutorch, use our definition lifted from CUDA */ -#if defined(__GNUC__) -#define __thalign__(n) __attribute__((aligned(n))) -#elif defined(_WIN32) -#define __thalign__(n) __declspec(align(n)) -#else -#define __thalign__(n) -#endif - -typedef struct __thalign__(2){ - unsigned short x; -} __THHalf; - -typedef struct __thalign__(4) { - unsigned int x; -} __THHalf2; - -typedef __THHalf THHalf; -typedef __THHalf2 THHalf2; - -TH_API void TH_float2halfbits(float*, unsigned short*); -TH_API void TH_halfbits2float(unsigned short*, float*); - -TH_API THHalf TH_float2half(float); -TH_API float TH_half2float(THHalf); - -#ifndef TH_HALF_BITS_TO_LITERAL -# define TH_HALF_BITS_TO_LITERAL(n) { n } -#endif - -#define TH_HALF_ZERO 0x0U -#define TH_HALF_INF 0x7C00U - -#undef __thalign__ -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THLapack.c b/contrib/lua-torch/torch7/lib/TH/THLapack.c deleted file mode 100644 index bd4dc716b..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THLapack.c +++ /dev/null @@ -1,4 +0,0 @@ -#include "THLapack.h" - -#include "generic/THLapack.c" -#include "THGenerateFloatTypes.h" diff --git a/contrib/lua-torch/torch7/lib/TH/THLapack.h b/contrib/lua-torch/torch7/lib/TH/THLapack.h deleted file mode 100644 index 614d15f94..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THLapack.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef TH_LAPACK_INC -#define TH_LAPACK_INC - -#include "THGeneral.h" - -#define THLapack_(NAME) TH_CONCAT_4(TH,Real,Lapack_,NAME) - -#define THLapackCheck(fmt, func, info , ...) \ -if (info < 0) { \ - THError("Lapack Error in %s : Illegal Argument %d", func, -info); \ -} else if(info > 0) { \ - THError(fmt, func, info, ##__VA_ARGS__); \ -} \ - -#define THLapackCheckWithCleanup(fmt, cleanup, func, info , ...) \ -if (info < 0) { \ - cleanup \ - THError("Lapack Error in %s : Illegal Argument %d", func, -info); \ -} else if(info > 0) { \ - cleanup \ - THError(fmt, func, info, ##__VA_ARGS__); \ -} - -#include "generic/THLapack.h" -#include "THGenerateAllTypes.h" - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THLogAdd.c b/contrib/lua-torch/torch7/lib/TH/THLogAdd.c deleted file mode 100644 index 4b14f8540..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THLogAdd.c +++ /dev/null @@ -1,88 +0,0 @@ -#include "THLogAdd.h" - -#include <float.h> - -#ifdef USE_DOUBLE -#define MINUS_LOG_THRESHOLD -39.14 -#else -#define MINUS_LOG_THRESHOLD -18.42 -#endif - -const double THLog2Pi=1.83787706640934548355; -const double THLogZero=-DBL_MAX; -const double THLogOne=0; - -double THLogAdd(double log_a, double log_b) -{ - double minusdif; - - if (log_a < log_b) - { - double tmp = log_a; - log_a = log_b; - log_b = tmp; - } - - minusdif = log_b - log_a; -#ifdef DEBUG - if (isnan(minusdif)) - THError("THLogAdd: minusdif (%f) log_b (%f) or log_a (%f) is nan", minusdif, log_b, log_a); -#endif - if (minusdif < MINUS_LOG_THRESHOLD) - return log_a; - else - return log_a + log1p(exp(minusdif)); -} - -double THLogSub(double log_a, double log_b) -{ - double minusdif; - - if (log_a < log_b) - THError("LogSub: log_a (%f) should be greater than log_b (%f)", log_a, log_b); - - minusdif = log_b - log_a; -#ifdef DEBUG - if (isnan(minusdif)) - THError("LogSub: minusdif (%f) log_b (%f) or log_a (%f) is nan", minusdif, log_b, log_a); -#endif - if (log_a == log_b) - return THLogZero; - else if (minusdif < MINUS_LOG_THRESHOLD) - return log_a; - else - return log_a + log1p(-exp(minusdif)); -} - -/* Credits to Leon Bottou */ -double THExpMinusApprox(const double x) -{ -#define EXACT_EXPONENTIAL 0 -#if EXACT_EXPONENTIAL - return exp(-x); -#else - /* fast approximation of exp(-x) for x positive */ -# define A0 (1.0) -# define A1 (0.125) -# define A2 (0.0078125) -# define A3 (0.00032552083) -# define A4 (1.0172526e-5) - if (x < 13.0) - { -/* assert(x>=0); */ - double y; - y = A0+x*(A1+x*(A2+x*(A3+x*A4))); - y *= y; - y *= y; - y *= y; - y = 1/y; - return y; - } - return 0; -# undef A0 -# undef A1 -# undef A2 -# undef A3 -# undef A4 -#endif -} diff --git a/contrib/lua-torch/torch7/lib/TH/THLogAdd.h b/contrib/lua-torch/torch7/lib/TH/THLogAdd.h deleted file mode 100644 index 9319b8f46..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THLogAdd.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef TH_LOG_ADD_INC -#define TH_LOG_ADD_INC - -#include "THGeneral.h" - -TH_API const double THLog2Pi; -TH_API const double THLogZero; -TH_API const double THLogOne; - -TH_API double THLogAdd(double log_a, double log_b); -TH_API double THLogSub(double log_a, double log_b); -TH_API double THExpMinusApprox(const double x); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THMath.h b/contrib/lua-torch/torch7/lib/TH/THMath.h deleted file mode 100644 index 004e4fe45..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THMath.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _THMATH_H -#define _THMATH_H - -static inline double TH_sigmoid(double value) { - return 1.0 / (1.0 + exp(-value)); -} - -static inline double TH_frac(double x) { - return x - trunc(x); -} - -static inline double TH_rsqrt(double x) { - return 1.0 / sqrt(x); -} - -static inline double TH_lerp(double a, double b, double weight) { - return a + weight * (b-a); -} - -static inline float TH_sigmoidf(float value) { - return 1.0f / (1.0f + expf(-value)); -} - -static inline float TH_fracf(float x) { - return x - truncf(x); -} - -static inline float TH_rsqrtf(float x) { - return 1.0f / sqrtf(x); -} - -static inline float TH_lerpf(float a, float b, float weight) { - return a + weight * (b-a); -} - -#endif // _THMATH_H diff --git a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.c b/contrib/lua-torch/torch7/lib/TH/THMemoryFile.c deleted file mode 100644 index ecce6e1b1..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.c +++ /dev/null @@ -1,685 +0,0 @@ -#include "THMemoryFile.h" -#include "THFilePrivate.h" -#include "stdint.h" - -typedef struct THMemoryFile__ -{ - THFile file; - THCharStorage *storage; - size_t size; - size_t position; - int longSize; - -} THMemoryFile; - -static int THMemoryFile_isOpened(THFile *self) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - return (mfself->storage != NULL); -} - -static char *THMemoryFile_strnextspace(char *str_, char *c_) -{ - char c; - - while( (c = *str_) ) - { - if( (c != ' ') && (c != '\n') && (c != ':') && (c != ';') ) - break; - str_++; - } - - while( (c = *str_) ) - { - if( (c == ' ') || (c == '\n') || (c == ':') || (c == ';') ) - { - *c_ = c; - *str_ = '\0'; - return(str_); - } - str_++; - } - return NULL; -} - -static void THMemoryFile_grow(THMemoryFile *self, size_t size) -{ - size_t missingSpace; - - if(size <= self->size) - return; - else - { - if(size < self->storage->size) /* note the "<" and not "<=" */ - { - self->size = size; - self->storage->data[self->size] = '\0'; - return; - } - } - - missingSpace = size-self->storage->size+1; /* +1 for the '\0' */ - THCharStorage_resize(self->storage, (self->storage->size/2 > missingSpace ? - self->storage->size + (self->storage->size/2) - : self->storage->size + missingSpace)); -} - -static int THMemoryFile_mode(const char *mode, int *isReadable, int *isWritable) -{ - *isReadable = 0; - *isWritable = 0; - if(strlen(mode) == 1) - { - if(*mode == 'r') - { - *isReadable = 1; - return 1; - } - else if(*mode == 'w') - { - *isWritable = 1; - return 1; - } - } - else if(strlen(mode) == 2) - { - if(mode[0] == 'r' && mode[1] == 'w') - { - *isReadable = 1; - *isWritable = 1; - return 1; - } - } - return 0; -} - -/********************************************************/ - -#define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM, INSIDE_SPACING) \ - static size_t THMemoryFile_read##TYPEC(THFile *self, TYPE *data, size_t n) \ - { \ - THMemoryFile *mfself = (THMemoryFile*)self; \ - size_t nread = 0; \ - \ - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); \ - THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file"); \ - \ - if (n == 0) \ - return 0; \ - \ - if(mfself->file.isBinary) \ - { \ - size_t nByte = sizeof(TYPE)*n; \ - size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); \ - nread = nByteRemaining/sizeof(TYPE); \ - memmove(data, mfself->storage->data+mfself->position, nread*sizeof(TYPE)); \ - mfself->position += nread*sizeof(TYPE); \ - } \ - else \ - { \ - size_t i; \ - for(i = 0; i < n; i++) \ - { \ - size_t nByteRead = 0; \ - char spaceChar = 0; \ - char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar); \ - ASCII_READ_ELEM; \ - if(ret == EOF) \ - { \ - while(mfself->storage->data[mfself->position]) \ - mfself->position++; \ - } \ - else \ - mfself->position += nByteRead; \ - if(spacePtr) \ - *spacePtr = spaceChar; \ - } \ - if(mfself->file.isAutoSpacing && (n > 0)) \ - { \ - if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\n') ) \ - mfself->position++; \ - } \ - } \ - \ - if(nread != n) \ - { \ - mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \ - if(!mfself->file.isQuiet) \ - THError("read error: read %d blocks instead of %d", nread, n); \ - } \ - \ - return nread; \ - } \ - \ - static size_t THMemoryFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \ - { \ - THMemoryFile *mfself = (THMemoryFile*)self; \ - \ - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); \ - THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file"); \ - \ - if (n == 0) \ - return 0; \ - \ - if(mfself->file.isBinary) \ - { \ - size_t nByte = sizeof(TYPE)*n; \ - THMemoryFile_grow(mfself, mfself->position+nByte); \ - memmove(mfself->storage->data+mfself->position, data, nByte); \ - mfself->position += nByte; \ - if(mfself->position > mfself->size) \ - { \ - mfself->size = mfself->position; \ - mfself->storage->data[mfself->size] = '\0'; \ - } \ - } \ - else \ - { \ - size_t i; \ - for(i = 0; i < n; i++) \ - { \ - ssize_t nByteWritten; \ - while (1) \ - { \ - ASCII_WRITE_ELEM; \ - if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) ) \ - { \ - mfself->position += nByteWritten; \ - break; \ - } \ - THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2); \ - } \ - if(mfself->file.isAutoSpacing) \ - { \ - if(i < n-1) \ - { \ - THMemoryFile_grow(mfself, mfself->position+1); \ - sprintf(mfself->storage->data+mfself->position, " "); \ - mfself->position++; \ - } \ - if(i == n-1) \ - { \ - THMemoryFile_grow(mfself, mfself->position+1); \ - sprintf(mfself->storage->data+mfself->position, "\n"); \ - mfself->position++; \ - } \ - } \ - } \ - if(mfself->position > mfself->size) \ - { \ - mfself->size = mfself->position; \ - mfself->storage->data[mfself->size] = '\0'; \ - } \ - } \ - \ - return n; \ - } - - -void THMemoryFile_longSize(THFile *self, int size) -{ - THMemoryFile *dfself = (THMemoryFile*)(self); - THArgCheck(size == 0 || size == 4 || size == 8, 1, "Invalid long size specified"); - dfself->longSize = size; -} - -THCharStorage *THMemoryFile_storage(THFile *self) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - - THCharStorage_resize(mfself->storage, mfself->size+1); - - return mfself->storage; -} - -static void THMemoryFile_synchronize(THFile *self) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); -} - -static void THMemoryFile_seek(THFile *self, size_t position) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - THArgCheck(position >= 0, 2, "position must be positive"); - - if(position <= mfself->size) - mfself->position = position; - else - { - mfself->file.hasError = 1; - if(!mfself->file.isQuiet) - THError("unable to seek at position %zu", position); - } -} - -static void THMemoryFile_seekEnd(THFile *self) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - - mfself->position = mfself->size; -} - -static size_t THMemoryFile_position(THFile *self) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - return mfself->position; -} - -static void THMemoryFile_close(THFile *self) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - THCharStorage_free(mfself->storage); - mfself->storage = NULL; -} - -static void THMemoryFile_free(THFile *self) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - - if(mfself->storage) - THCharStorage_free(mfself->storage); - - THFree(mfself); -} - -/* READ_WRITE_METHODS(bool, Bool, */ -/* int value = 0; int ret = sscanf(mfself->storage->data+mfself->position, "%d%n", &value, &nByteRead); data[i] = (value ? 1 : 0), */ -/* int value = (data[i] ? 1 : 0); nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%d", value), */ -/* 1) */ - -READ_WRITE_METHODS(unsigned char, Byte, - size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position); \ - if(spacePtr) *spacePtr = spaceChar; \ - nByteRead = ret; \ - nread = ret; \ - i = n-1; \ - memmove(data, mfself->storage->data+mfself->position, nByteRead), - nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \ - i = n-1; \ - if(nByteWritten > -1) - memmove(mfself->storage->data+mfself->position, data, nByteWritten), - 0) - -/* DEBUG: we should check if %n is count or not as a element (so ret might need to be ret-- on some systems) */ -/* Note that we do a trick for char */ -READ_WRITE_METHODS(char, Char, - size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position); \ - if(spacePtr) *spacePtr = spaceChar; \ - nByteRead = ret; \ - nread = ret; \ - i = n-1; \ - memmove(data, mfself->storage->data+mfself->position, nByteRead), - nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \ - i = n-1; \ - if(nByteWritten > -1) - memmove(mfself->storage->data+mfself->position, data, nByteWritten), - 0) - -READ_WRITE_METHODS(short, Short, - int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%hd%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, - nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%hd", data[i]), - 1) - -READ_WRITE_METHODS(int, Int, - int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%d%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, - nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%d", data[i]), - 1) - -READ_WRITE_METHODS(float, Float, - int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%g%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, - nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.9g", data[i]), - 1) - -READ_WRITE_METHODS(THHalf, Half, - int nByteRead_; float buf; \ - int ret = sscanf(mfself->storage->data+mfself->position, "%g%n", &buf, &nByteRead_); \ - data[i] = TH_float2half(buf); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, - nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.9g", TH_half2float(data[i])), - 1) - -READ_WRITE_METHODS(double, Double, - int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%lg%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++, - nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%.17g", data[i]), - 1) - -int THDiskFile_isLittleEndianCPU(void); - -static size_t THMemoryFile_readLong(THFile *self, long *data, size_t n) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - size_t nread = 0L; - - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file"); - - if (n == 0) - return 0; - - if(mfself->file.isBinary) - { - if(mfself->longSize == 0 || mfself->longSize == sizeof(long)) - { - size_t nByte = sizeof(long)*n; - size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); - nread = nByteRemaining/sizeof(long); - memmove(data, mfself->storage->data+mfself->position, nread*sizeof(long)); - mfself->position += nread*sizeof(long); - } else if(mfself->longSize == 4) - { - size_t nByte = 4*n; - size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); - int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); - nread = nByteRemaining/4; - size_t i; - for(i = 0; i < nread; i++) - data[i] = storage[i]; - mfself->position += nread*4; - } - else /* if(mfself->longSize == 8) */ - { - int big_endian = !THDiskFile_isLittleEndianCPU(); - size_t nByte = 8*n; - int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); - size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); - nread = nByteRemaining/8; - size_t i; - for(i = 0; i < nread; i++) - data[i] = storage[2*i + big_endian]; - mfself->position += nread*8; - } - } - else - { - size_t i; - for(i = 0; i < n; i++) - { - size_t nByteRead = 0; - char spaceChar = 0; - char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar); - int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, "%ld%n", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++; - if(ret == EOF) - { - while(mfself->storage->data[mfself->position]) - mfself->position++; - } - else - mfself->position += nByteRead; - if(spacePtr) - *spacePtr = spaceChar; - } - if(mfself->file.isAutoSpacing && (n > 0)) - { - if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\n') ) - mfself->position++; - } - } - - if(nread != n) - { - mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ - if(!mfself->file.isQuiet) - THError("read error: read %d blocks instead of %d", nread, n); - } - - return nread; -} - -static size_t THMemoryFile_writeLong(THFile *self, long *data, size_t n) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file"); - - if (n == 0) - return 0; - - if(mfself->file.isBinary) - { - if(mfself->longSize == 0 || mfself->longSize == sizeof(long)) - { - size_t nByte = sizeof(long)*n; - THMemoryFile_grow(mfself, mfself->position+nByte); - memmove(mfself->storage->data+mfself->position, data, nByte); - mfself->position += nByte; - } else if(mfself->longSize == 4) - { - size_t nByte = 4*n; - THMemoryFile_grow(mfself, mfself->position+nByte); - int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); - size_t i; - for(i = 0; i < n; i++) - storage[i] = data[i]; - mfself->position += nByte; - } - else /* if(mfself->longSize == 8) */ - { - int big_endian = !THDiskFile_isLittleEndianCPU(); - size_t nByte = 8*n; - THMemoryFile_grow(mfself, mfself->position+nByte); - int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position); - size_t i; - for(i = 0; i < n; i++) - { - storage[2*i + !big_endian] = 0; - storage[2*i + big_endian] = data[i]; - } - mfself->position += nByte; - } - if(mfself->position > mfself->size) - { - mfself->size = mfself->position; - mfself->storage->data[mfself->size] = '\0'; - } - } - else - { - size_t i; - for(i = 0; i < n; i++) - { - ssize_t nByteWritten; - while (1) - { - nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, "%ld", data[i]); - if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) ) - { - mfself->position += nByteWritten; - break; - } - THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2); - } - if(mfself->file.isAutoSpacing) - { - if(i < n-1) - { - THMemoryFile_grow(mfself, mfself->position+1); - sprintf(mfself->storage->data+mfself->position, " "); - mfself->position++; - } - if(i == n-1) - { - THMemoryFile_grow(mfself, mfself->position+1); - sprintf(mfself->storage->data+mfself->position, "\n"); - mfself->position++; - } - } - } - if(mfself->position > mfself->size) - { - mfself->size = mfself->position; - mfself->storage->data[mfself->size] = '\0'; - } - } - - return n; -} - -static char* THMemoryFile_cloneString(const char *str, ptrdiff_t size) -{ - char *cstr = THAlloc(size); - memcpy(cstr, str, size); - return cstr; -} - -static size_t THMemoryFile_readString(THFile *self, const char *format, char **str_) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - THArgCheck(mfself->file.isReadable, 1, "attempt to read in a write-only file"); - THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, "format must be '*a' or '*l'"); - - if(mfself->position == mfself->size) /* eof ? */ - { - mfself->file.hasError = 1; - if(!mfself->file.isQuiet) - THError("read error: read 0 blocks instead of 1"); - - *str_ = NULL; - return 0; - } - - if(format[1] == 'a') - { - size_t str_size = mfself->size-mfself->position; - - *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size); - mfself->position = mfself->size; - - return str_size; - } - else - { - char *p = mfself->storage->data+mfself->position; - int eolFound = 0; - size_t posEol; - size_t i; - for(i = 0; i < mfself->size-mfself->position; i++) - { - if(p[i] == '\n') - { - posEol = i; - eolFound = 1; - break; - } - } - - if(eolFound) - { - *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, posEol); - mfself->position += posEol+1; - return posEol; - } - else /* well, we read all! */ - { - size_t str_size = mfself->size-mfself->position; - - *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size); - mfself->position = mfself->size; - - return str_size; - } - } - - *str_ = NULL; - return 0; -} - -static size_t THMemoryFile_writeString(THFile *self, const char *str, size_t size) -{ - THMemoryFile *mfself = (THMemoryFile*)self; - - THArgCheck(mfself->storage != NULL, 1, "attempt to use a closed file"); - THArgCheck(mfself->file.isWritable, 1, "attempt to write in a read-only file"); - - THMemoryFile_grow(mfself, mfself->position+size); - memmove(mfself->storage->data+mfself->position, str, size); - mfself->position += size; - if(mfself->position > mfself->size) - { - mfself->size = mfself->position; - mfself->storage->data[mfself->size] = '\0'; - } - - return size; -} - -THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode) -{ - static struct THFileVTable vtable = { - THMemoryFile_isOpened, - - THMemoryFile_readByte, - THMemoryFile_readChar, - THMemoryFile_readShort, - THMemoryFile_readInt, - THMemoryFile_readLong, - THMemoryFile_readFloat, - THMemoryFile_readDouble, - THMemoryFile_readHalf, - THMemoryFile_readString, - - THMemoryFile_writeByte, - THMemoryFile_writeChar, - THMemoryFile_writeShort, - THMemoryFile_writeInt, - THMemoryFile_writeLong, - THMemoryFile_writeFloat, - THMemoryFile_writeDouble, - THMemoryFile_writeHalf, - THMemoryFile_writeString, - - THMemoryFile_synchronize, - THMemoryFile_seek, - THMemoryFile_seekEnd, - THMemoryFile_position, - THMemoryFile_close, - THMemoryFile_free - }; - - THMemoryFile *mfself; - int isReadable; - int isWritable; - - if(storage) - { - THArgCheck(storage->data[storage->size-1] == '\0', 1, "provided CharStorage must be terminated by 0"); - THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'"); - THCharStorage_retain(storage); - } - else - { - THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, "file mode should be 'r','w' or 'rw'"); - storage = THCharStorage_newWithSize(1); - storage->data[0] = '\0'; - } - - mfself = THAlloc(sizeof(THMemoryFile)); - - mfself->storage = storage; - mfself->size = (storage ? storage->size-1 : 0); - mfself->position = 0; - mfself->longSize = 0; - - mfself->file.vtable = &vtable; - mfself->file.isQuiet = 0; - mfself->file.isReadable = isReadable; - mfself->file.isWritable = isWritable; - mfself->file.isBinary = 0; - mfself->file.isAutoSpacing = 1; - mfself->file.hasError = 0; - - return (THFile*)mfself; -} - -THFile *THMemoryFile_new(const char *mode) -{ - return THMemoryFile_newWithStorage(NULL, mode); -} diff --git a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.h b/contrib/lua-torch/torch7/lib/TH/THMemoryFile.h deleted file mode 100644 index b54cdcc2f..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THMemoryFile.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef TH_MEMORY_FILE_INC -#define TH_MEMORY_FILE_INC - -#include "THFile.h" -#include "THStorage.h" - -TH_API THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode); -TH_API THFile *THMemoryFile_new(const char *mode); - -TH_API THCharStorage *THMemoryFile_storage(THFile *self); -TH_API void THMemoryFile_longSize(THFile *self, int size); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THRandom.c b/contrib/lua-torch/torch7/lib/TH/THRandom.c deleted file mode 100644 index 86d721e7b..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THRandom.c +++ /dev/null @@ -1,272 +0,0 @@ -#include "THGeneral.h" -#include "THRandom.h" - -/* Code for the Mersenne Twister random generator.... */ -#define n _MERSENNE_STATE_N -#define m _MERSENNE_STATE_M - -/* Creates (unseeded) new generator*/ -static THGenerator* THGenerator_newUnseeded(void) -{ - THGenerator *self = THAlloc(sizeof(THGenerator)); - memset(self, 0, sizeof(THGenerator)); - self->left = 1; - self->seeded = 0; - self->normal_is_valid = 0; - return self; -} - -/* Creates new generator and makes sure it is seeded*/ -THGenerator* THGenerator_new(void) -{ - THGenerator *self = THGenerator_newUnseeded(); - THRandom_seed(self); - return self; -} - -THGenerator* THGenerator_copy(THGenerator *self, THGenerator *from) -{ - memcpy(self, from, sizeof(THGenerator)); - return self; -} - -void THGenerator_free(THGenerator *self) -{ - THFree(self); -} - -int THGenerator_isValid(THGenerator *_generator) -{ - if ((_generator->seeded == 1) && - (_generator->left > 0 && _generator->left <= n) && (_generator->next <= n)) - return 1; - - return 0; -} - -#ifndef _WIN32 -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> - -static unsigned long readURandomLong() -{ - int randDev = open("/dev/urandom", O_RDONLY); - unsigned long randValue; - if (randDev < 0) { - THError("Unable to open /dev/urandom"); - } - ssize_t readBytes = read(randDev, &randValue, sizeof(randValue)); - if (readBytes < sizeof(randValue)) { - THError("Unable to read from /dev/urandom"); - } - close(randDev); - return randValue; -} -#endif // _WIN32 - -unsigned long THRandom_seed(THGenerator *_generator) -{ -#ifdef _WIN32 - unsigned long s = (unsigned long)time(0); -#else - unsigned long s = readURandomLong(); -#endif - THRandom_manualSeed(_generator, s); - return s; -} - -/* The next 4 methods are taken from http:www.math.keio.ac.jpmatumotoemt.html - Here is the copyright: - Some minor modifications have been made to adapt to "my" C... */ - -/* - A C-program for MT19937, with initialization improved 2002/2/10. - Coded by Takuji Nishimura and Makoto Matsumoto. - This is a faster version by taking Shawn Cokus's optimization, - Matthe Bellew's simplification, Isaku Wada's double version. - - Before using, initialize the state by using init_genrand(seed) - or init_by_array(init_key, key_length). - - Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. The names of its contributors may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - Any feedback is very welcome. - http://www.math.keio.ac.jp/matumoto/emt.html - email: matumoto@math.keio.ac.jp -*/ - -/* Macros for the Mersenne Twister random generator... */ -/* Period parameters */ -/* #define n 624 */ -/* #define m 397 */ -#define MATRIX_A 0x9908b0dfUL /* constant vector a */ -#define UMASK 0x80000000UL /* most significant w-r bits */ -#define LMASK 0x7fffffffUL /* least significant r bits */ -#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) -#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) -/*********************************************************** That's it. */ - -void THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_) -{ - int j; - - /* This ensures reseeding resets all of the state (i.e. state for Gaussian numbers) */ - THGenerator *blank = THGenerator_newUnseeded(); - THGenerator_copy(_generator, blank); - THGenerator_free(blank); - - _generator->the_initial_seed = the_seed_; - _generator->state[0] = _generator->the_initial_seed & 0xffffffffUL; - for(j = 1; j < n; j++) - { - _generator->state[j] = (1812433253UL * (_generator->state[j-1] ^ (_generator->state[j-1] >> 30)) + j); - /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ - /* In the previous versions, mSBs of the seed affect */ - /* only mSBs of the array state[]. */ - /* 2002/01/09 modified by makoto matsumoto */ - _generator->state[j] &= 0xffffffffUL; /* for >32 bit machines */ - } - _generator->left = 1; - _generator->seeded = 1; -} - -unsigned long THRandom_initialSeed(THGenerator *_generator) -{ - return _generator->the_initial_seed; -} - -void THRandom_nextState(THGenerator *_generator) -{ - unsigned long *p = _generator->state; - int j; - - _generator->left = n; - _generator->next = 0; - - for(j = n-m+1; --j; p++) - *p = p[m] ^ TWIST(p[0], p[1]); - - for(j = m; --j; p++) - *p = p[m-n] ^ TWIST(p[0], p[1]); - - *p = p[m-n] ^ TWIST(p[0], _generator->state[0]); -} - -unsigned long THRandom_random(THGenerator *_generator) -{ - unsigned long y; - - if (--(_generator->left) == 0) - THRandom_nextState(_generator); - y = *(_generator->state + (_generator->next)++); - - /* Tempering */ - y ^= (y >> 11); - y ^= (y << 7) & 0x9d2c5680UL; - y ^= (y << 15) & 0xefc60000UL; - y ^= (y >> 18); - - return y; -} - -/* generates a random number on [0,1)-double-interval */ -static double __uniform__(THGenerator *_generator) -{ - /* divided by 2^32 */ - return (double)THRandom_random(_generator) * (1.0/4294967296.0); -} - -/********************************************************* - - Thanks *a lot* Takuji Nishimura and Makoto Matsumoto! - - Now my own code... - -*********************************************************/ - -double THRandom_uniform(THGenerator *_generator, double a, double b) -{ - return(__uniform__(_generator) * (b - a) + a); -} - -double THRandom_normal(THGenerator *_generator, double mean, double stdv) -{ - THArgCheck(stdv > 0, 2, "standard deviation must be strictly positive"); - - /* This is known as the Box-Muller method */ - if(!_generator->normal_is_valid) - { - _generator->normal_x = __uniform__(_generator); - _generator->normal_y = __uniform__(_generator); - _generator->normal_rho = sqrt(-2. * log(1.0-_generator->normal_y)); - _generator->normal_is_valid = 1; - } - else - _generator->normal_is_valid = 0; - - if(_generator->normal_is_valid) - return _generator->normal_rho*cos(2.*M_PI*_generator->normal_x)*stdv+mean; - else - return _generator->normal_rho*sin(2.*M_PI*_generator->normal_x)*stdv+mean; -} - -double THRandom_exponential(THGenerator *_generator, double lambda) -{ - return(-1. / lambda * log(1-__uniform__(_generator))); -} - -double THRandom_cauchy(THGenerator *_generator, double median, double sigma) -{ - return(median + sigma * tan(M_PI*(__uniform__(_generator)-0.5))); -} - -/* Faut etre malade pour utiliser ca. - M'enfin. */ -double THRandom_logNormal(THGenerator *_generator, double mean, double stdv) -{ - THArgCheck(stdv > 0, 2, "standard deviation must be strictly positive"); - return(exp(THRandom_normal(_generator, mean, stdv))); -} - -int THRandom_geometric(THGenerator *_generator, double p) -{ - THArgCheck(p > 0 && p < 1, 1, "must be > 0 and < 1"); - return((int)(log(1-__uniform__(_generator)) / log(p)) + 1); -} - -int THRandom_bernoulli(THGenerator *_generator, double p) -{ - THArgCheck(p >= 0 && p <= 1, 1, "must be >= 0 and <= 1"); - return(__uniform__(_generator) <= p); -} diff --git a/contrib/lua-torch/torch7/lib/TH/THRandom.h b/contrib/lua-torch/torch7/lib/TH/THRandom.h deleted file mode 100644 index 28a14c0d7..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THRandom.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef TH_RANDOM_INC -#define TH_RANDOM_INC - -#include "THGeneral.h" - -#define _MERSENNE_STATE_N 624 -#define _MERSENNE_STATE_M 397 -/* A THGenerator contains all the state required for a single random number stream */ -typedef struct THGenerator { - /* The initial seed. */ - unsigned long the_initial_seed; - int left; /* = 1; */ - int seeded; /* = 0; */ - unsigned long next; - unsigned long state[_MERSENNE_STATE_N]; /* the array for the state vector */ - /********************************/ - - /* For normal distribution */ - double normal_x; - double normal_y; - double normal_rho; - int normal_is_valid; /* = 0; */ -} THGenerator; - -#define torch_Generator "torch.Generator" - -/* Manipulate THGenerator objects */ -TH_API THGenerator * THGenerator_new(void); -TH_API THGenerator * THGenerator_copy(THGenerator *self, THGenerator *from); -TH_API void THGenerator_free(THGenerator *gen); - -/* Checks if given generator is valid */ -TH_API int THGenerator_isValid(THGenerator *_generator); - -/* Initializes the random number generator from /dev/urandom (or on Windows -platforms with the current time (granularity: seconds)) and returns the seed. */ -TH_API unsigned long THRandom_seed(THGenerator *_generator); - -/* Initializes the random number generator with the given long "the_seed_". */ -TH_API void THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_); - -/* Returns the starting seed used. */ -TH_API unsigned long THRandom_initialSeed(THGenerator *_generator); - -/* Generates a uniform 32 bits integer. */ -TH_API unsigned long THRandom_random(THGenerator *_generator); - -/* Generates a uniform random number on [0,1[. */ -TH_API double THRandom_uniform(THGenerator *_generator, double a, double b); - -/** Generates a random number from a normal distribution. - (With mean #mean# and standard deviation #stdv >= 0#). -*/ -TH_API double THRandom_normal(THGenerator *_generator, double mean, double stdv); - -/** Generates a random number from an exponential distribution. - The density is $p(x) = lambda * exp(-lambda * x)$, where - lambda is a positive number. -*/ -TH_API double THRandom_exponential(THGenerator *_generator, double lambda); - -/** Returns a random number from a Cauchy distribution. - The Cauchy density is $p(x) = sigma/(pi*(sigma^2 + (x-median)^2))$ -*/ -TH_API double THRandom_cauchy(THGenerator *_generator, double median, double sigma); - -/** Generates a random number from a log-normal distribution. - (#mean > 0# is the mean of the log-normal distribution - and #stdv# is its standard deviation). -*/ -TH_API double THRandom_logNormal(THGenerator *_generator, double mean, double stdv); - -/** Generates a random number from a geometric distribution. - It returns an integer #i#, where $p(i) = (1-p) * p^(i-1)$. - p must satisfy $0 < p < 1$. -*/ -TH_API int THRandom_geometric(THGenerator *_generator, double p); - -/* Returns true with probability $p$ and false with probability $1-p$ (p > 0). */ -TH_API int THRandom_bernoulli(THGenerator *_generator, double p); -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THSize.c b/contrib/lua-torch/torch7/lib/TH/THSize.c deleted file mode 100644 index ccf1f61dd..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THSize.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "THSize.h" - -int THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB) { - int d; - if (dimsA != dimsB) - return 0; - for(d = 0; d < dimsA; ++d) - { - if(sizeA[d] != sizeB[d]) - return 0; - } - return 1; -} - -ptrdiff_t THSize_nElement(long dims, long *size) { - if(dims == 0) - return 0; - else - { - ptrdiff_t nElement = 1; - int d; - for(d = 0; d < dims; d++) - nElement *= size[d]; - return nElement; - } -} diff --git a/contrib/lua-torch/torch7/lib/TH/THSize.h b/contrib/lua-torch/torch7/lib/TH/THSize.h deleted file mode 100644 index 3d39696f6..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THSize.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef TH_SIZE_INC -#define TH_SIZE_INC - -#include "THGeneral.h" -#include <stddef.h> - -// THTensor functions that would work on a THSize if we had such a class in C++, -// i.e. THTensor functions that depend only on the shape of the tensor, not the type. - -TH_API int THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB); -TH_API ptrdiff_t THSize_nElement(long dims, long *size); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THStorage.c b/contrib/lua-torch/torch7/lib/TH/THStorage.c deleted file mode 100644 index f6b63f4a8..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THStorage.c +++ /dev/null @@ -1,153 +0,0 @@ -#include "THAtomic.h" -#include "THStorage.h" - -#include "generic/THStorage.c" -#include "THGenerateAllTypes.h" - -#include "generic/THStorage.c" -#include "THGenerateHalfType.h" - -#include "generic/THStorageCopy.c" -#include "THGenerateAllTypes.h" - -#include "generic/THStorageCopy.c" -#include "THGenerateHalfType.h" - - -THDescBuff THLongStorage_sizeDesc(const THLongStorage *size) { - return _THSizeDesc(size->data, size->size); -} - -THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement) -{ - ptrdiff_t total_size = (size->size > 0 ? 1 : 0); - ptrdiff_t dim_infer = -1; - ptrdiff_t i; - for (i = 0; i < size->size; i++) { - if (size->data[i] == -1) { - THArgCheck(dim_infer == -1, 1, "only one dimension can be inferred"); - dim_infer = i; - } else { - total_size *= size->data[i]; - } - } - if (dim_infer != -1) { - THDescBuff buf = THLongStorage_sizeDesc(size); - THArgCheck(total_size > 0 && nElement % total_size == 0, 2, - "size '%s' is invalid for input of with %td elements", buf.str, nElement); - } else { - THDescBuff buf = THLongStorage_sizeDesc(size); - THArgCheck(nElement == total_size, 2, - "size '%s' is invalid for input of with %td elements", buf.str, nElement); - } - THLongStorage* copy = THLongStorage_newWithSize(size->size); - THLongStorage_copy(copy, size); - if (dim_infer != -1) { - copy->data[dim_infer] = nElement / total_size; - } - return copy; -} - -int THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA, long *sizesB, long dimsB, - char *error_buffer, int buffer_len) { - THArgCheck(sizesA != NULL, 1, "sizesA must not be null"); - THArgCheck(sizesB != NULL, 2, "sizesB must not be null"); - THArgCheck(dimsA, 1, "Can't expand empty tensor a"); - THArgCheck(dimsB, 1, "Can't expand empty tensor b"); - ptrdiff_t ndim = dimsA > dimsB ? dimsA : dimsB; - - long *expandedSizes = THAlloc(sizeof(long)*ndim); - - for (long i = ndim - 1; i >= 0; --i) { - long offset = ndim - 1 - i; - long dimA = dimsA - 1 - offset; - long dimB = dimsB - 1 - offset; - long sizeA = (dimA >= 0) ? sizesA[dimA] : 1; - long sizeB = (dimB >= 0) ? sizesB[dimB] : 1; - if (sizeA == sizeB || sizeA == 1 || sizeB == 1) { - expandedSizes[i] = THMax(sizeA, sizeB); - } else { - THFree(expandedSizes); - snprintf(error_buffer, buffer_len, "The size of tensor a (%ld) must match the size of tensor b (%ld) at " - "non-singleton dimension %ld.", sizeA, sizeB, i); - return -1; - } - } - THLongStorage_resize(output, ndim); - memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim); - THFree(expandedSizes); - return 0; -} - -int THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims, - char *error_buffer, int buffer_len) { - THArgCheck(n > 0, 2, "n must be greater than 0"); - THArgCheck(sizes != NULL, 1, "sizes must not be null"); - THArgCheck(dims != NULL, 1, "dims must not be null"); - - ptrdiff_t ndim = 0; - for (int j = 0; j < n; ++j) { - THArgCheck(sizes[ j ] != NULL, 1, "size %d must not be null", j); - THArgCheck(dims[ j ], 1, "Can't expand empty tensor %d", j); - ndim = dims[ j ] > ndim ? dims[ j ] : ndim; - } - - long *expandedSizes = THAlloc(sizeof(long)*ndim); - - for (long i = ndim - 1; i >= 0; --i) { - expandedSizes[ i ] = 1; - long offset = ndim - 1 - i; - for (int j = 0; j < n; ++j) { - long dim = dims[ j ] - 1 - offset; - long size = (dim >= 0) ? sizes[ j ][ dim ] : 1; - if (size == expandedSizes[ i ] || size == 1 || expandedSizes[ i ] == 1) { - expandedSizes[ i ] = THMax(expandedSizes[ i ], size); - } else { - THFree(expandedSizes); - snprintf(error_buffer, buffer_len, "The size of tensor %i (%ld) must match the expanded size" - "of tensor (%ld) at non-singleton dimension %ld.", j, size, expandedSizes[ i ], i); - return -1; - } - } - } - THLongStorage_resize(output, ndim); - memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim); - THFree(expandedSizes); - return 0; -} - -int THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim, - THLongStorage *sizes, long **expandedSizes, long **expandedStrides, - char *error_buffer, int buffer_len) { - ptrdiff_t ndim = THLongStorage_size(sizes); - - long *expandedSizesCalc = THAlloc(sizeof(long)*ndim); - long *expandedStridesCalc = THAlloc(sizeof(long)*ndim); - - // create a new geometry for the tensors - for (long i = ndim - 1; i >= 0; --i) { - long offset = ndim - 1 - i; - long dim = tensorDim - 1 - offset; - long size = (dim >= 0) ? tensorSizes[dim] : 1; - long stride = (dim >= 0) ? - tensorStrides[dim] : expandedSizesCalc[i + 1] * expandedStridesCalc[i+1]; - long targetSize = THLongStorage_data(sizes)[i]; - if (size != targetSize) { - if (size == 1) { - size = targetSize; - stride = 0; - } else { - THFree(expandedSizesCalc); - THFree(expandedStridesCalc); - snprintf(error_buffer, buffer_len, "The expanded size of the tensor (%ld) must match the existing size (%ld) at " - "non-singleton dimension %ld.", targetSize, size, i); - return -1; - } - } - expandedSizesCalc[i] = size; - expandedStridesCalc[i] = stride; - } - *expandedSizes = expandedSizesCalc; - *expandedStrides = expandedStridesCalc; - return 0; -} diff --git a/contrib/lua-torch/torch7/lib/TH/THStorage.h b/contrib/lua-torch/torch7/lib/TH/THStorage.h deleted file mode 100644 index fb7946bd9..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THStorage.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef TH_STORAGE_INC -#define TH_STORAGE_INC - -#include "THGeneral.h" -#include "THAllocator.h" - -#define THStorage TH_CONCAT_3(TH,Real,Storage) -#define THStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME) - -/* fast access methods */ -#define TH_STORAGE_GET(storage, idx) ((storage)->data[(idx)]) -#define TH_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value)) - -#include "generic/THStorage.h" -#include "THGenerateAllTypes.h" - -#include "generic/THStorage.h" -#include "THGenerateHalfType.h" - -#include "generic/THStorageCopy.h" -#include "THGenerateAllTypes.h" - -#include "generic/THStorageCopy.h" -#include "THGenerateHalfType.h" - -TH_API THDescBuff THLongStorage_sizeDesc(const THLongStorage *size); -TH_API THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement); - -// Given the sizes of {2,N} tensors, write out the size when the tensors are expanded together. -TH_API int THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA, - long *sizesB, long dimsB, char *error_buffer, int buffer_len); -TH_API int THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims, - char *error_buffer, int buffer_len); - -TH_API int THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim, - THLongStorage *sizes, long **expandedSizes, long **expandedStrides, - char *error_buffer, int buffer_len); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THTensor.c b/contrib/lua-torch/torch7/lib/TH/THTensor.c deleted file mode 100644 index 115e396a1..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THTensor.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "THAtomic.h" -#include "THTensor.h" -#include "THVector.h" -#include "generic/simd/simd.h" - -#include "THBlas.h" -#include "THLapack.h" -#include "THRandom.h" -#include "THTensorDimApply.h" -#include "THMath.h" - -#include "generic/THTensor.c" -#include "THGenerateAllTypes.h" - -#include "generic/THTensor.c" -#include "THGenerateHalfType.h" - -#include "generic/THTensorCopy.c" -#include "THGenerateAllTypes.h" - -#include "generic/THTensorCopy.c" -#include "THGenerateHalfType.h" - -#include "generic/THTensorRandom.c" -#include "THGenerateAllTypes.h" - -#include "generic/THTensorMath.c" -#include "THGenerateAllTypes.h" - -#include "generic/THTensorConv.c" -#include "THGenerateAllTypes.h" - -#include "generic/THTensorLapack.c" -#include "THGenerateFloatTypes.h" diff --git a/contrib/lua-torch/torch7/lib/TH/THTensor.h b/contrib/lua-torch/torch7/lib/TH/THTensor.h deleted file mode 100644 index d2a1c57e8..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THTensor.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef TH_TENSOR_INC -#define TH_TENSOR_INC - -#include "THStorage.h" -#include "THTensorApply.h" - -#define THTensor TH_CONCAT_3(TH,Real,Tensor) -#define THTensor_(NAME) TH_CONCAT_4(TH,Real,Tensor_,NAME) - -/* basics */ -#include "generic/THTensor.h" -#include "THGenerateAllTypes.h" - -#include "generic/THTensor.h" -#include "THGenerateHalfType.h" - -#include "generic/THTensorCopy.h" -#include "THGenerateAllTypes.h" - -#include "generic/THTensorCopy.h" -#include "THGenerateHalfType.h" - -#include "THTensorMacros.h" - -/* random numbers */ -#include "THRandom.h" -#include "generic/THTensorRandom.h" -#include "THGenerateAllTypes.h" - -/* maths */ -#include "generic/THTensorMath.h" -#include "THGenerateAllTypes.h" - -/* convolutions */ -#include "generic/THTensorConv.h" -#include "THGenerateAllTypes.h" - -/* lapack support */ -#include "generic/THTensorLapack.h" -#include "THGenerateFloatTypes.h" - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THTensorApply.h b/contrib/lua-torch/torch7/lib/TH/THTensorApply.h deleted file mode 100644 index 7f48da47e..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THTensorApply.h +++ /dev/null @@ -1,238 +0,0 @@ -#ifndef TH_TENSOR_APPLY_INC -#define TH_TENSOR_APPLY_INC - -/* - * The basic strategy for apply is as follows: - * - * 1. Starting with the outermost index, loop until we reach a dimension where the - * data is no longer contiguous, i.e. the stride at that dimension is not equal to - * the size of the tensor defined by the outer dimensions. Let's call this outer - * (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal - * to the entire Tensor. Let's call the inner tensor B. - * - * 2. We loop through the indices in B, starting at its outermost dimension. For - * example, if B is a 2x2 matrix, then we do: - * - * B[0][0] - * B[0][1] - * B[1][0] - * B[1][1] - * - * We set the offset into the underlying storage as (storageOffset + stride_B * index_B), - * i.e. basically we compute the offset into the storage as we would normally for a - * Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we - * can simply loop for sizeof(A) iterations and perform the operation, without having to - * follow the order described by the strides of A. - * - * 3. As an optimization, we merge dimensions of A that are contiguous in memory. For - * example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, then the first two - * dimensions can be merged for the purposes of APPLY, reducing the number of nested - * loops. - */ - -#define __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, ALLOW_CONTIGUOUS) \ - TYPE *TENSOR##_data = NULL; \ - long *TENSOR##_counter = NULL, *TENSOR##_sizes = NULL, *TENSOR##_strides = NULL, *TENSOR##_dimOffset = NULL; \ - long TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i, TENSOR##_n; \ - int TENSOR##_contiguous = ALLOW_CONTIGUOUS && DIM < 0; \ - TENSOR##_n = (TENSOR->nDimension ? 1 : 0); \ - for(TENSOR##_i = 0; TENSOR##_i < TENSOR->nDimension; TENSOR##_i++) \ - TENSOR##_n *= TENSOR->size[TENSOR##_i]; \ -\ - if(TENSOR->nDimension == 0) \ - TH_TENSOR_APPLY_hasFinished = 1; \ - else \ - { \ - TENSOR##_data = TENSOR->storage->data+TENSOR->storageOffset; \ - TENSOR##_size = 1; \ - TENSOR##_stride = 1; \ - for(TENSOR##_i = TENSOR->nDimension-1; TENSOR##_i >= 0; TENSOR##_i--) { \ - if(TENSOR->size[TENSOR##_i] != 1) { \ - if(TENSOR->stride[TENSOR##_i] == TENSOR##_size && TENSOR##_i != DIM) \ - TENSOR##_size *= TENSOR->size[TENSOR##_i]; \ - else{ \ - TENSOR##_contiguous = 0; \ - break; \ - } \ - } \ - } \ - if (!TENSOR##_contiguous) { \ - /* Find the dimension of contiguous sections */ \ - TENSOR##_dim = 1; \ - for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; TENSOR##_i--) \ - { \ - if(TENSOR->stride[TENSOR##_i] != TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] || TENSOR##_i == DIM || TENSOR##_i+1 == DIM) \ - TENSOR##_dim++; \ - } \ - /* Allocate an array of 3*dim elements, where dim is the number of contiguous sections */ \ - TENSOR##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR##_dim)); \ - TENSOR##_sizes = TENSOR##_counter + TENSOR##_dim; \ - TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \ - TH_TENSOR_dim_index = TENSOR##_dim-1; \ - TENSOR##_dimOffset = (DIM == TENSOR->nDimension-1) ? &TENSOR##_i : &TENSOR##_counter[DIM]; \ - TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR->nDimension-1]; \ - TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \ - /* TENSOR##_counter tracks where we are in the storage. The offset into the */ \ - /* storage is given by storage_offset + (i * j), where i is the stride */ \ - /* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \ - for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \ - TENSOR##_counter[TENSOR##_i] = 0; \ - } \ - for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; --TENSOR##_i) { \ - if (TENSOR->stride[TENSOR##_i] == TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] && TENSOR##_i != DIM && TENSOR##_i+1 != DIM) { \ - TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_sizes[TH_TENSOR_dim_index]; \ - if (DIM != TENSOR->nDimension-1 && TENSOR##_i < DIM) \ - TENSOR##_dimOffset--; \ - } else { \ - --TH_TENSOR_dim_index; \ - TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i]; \ - TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR##_i]; \ - } \ - } \ - /* Size of the inner most section */ \ - TENSOR##_size = TENSOR##_sizes[TENSOR##_dim-1]; \ - /* Stride of the inner most section */ \ - TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \ - } \ - } \ - TENSOR##_i = 0; - -#define __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, ALWAYS_UPDATE) \ - if(TENSOR##_i == TENSOR##_size || ALWAYS_UPDATE) \ - { \ - if(TENSOR##_contiguous) \ - break; \ -\ - if(TENSOR##_dim == 1) \ - break; \ -\ - /* Reset pointer to beginning of loop */ \ - TENSOR##_data -= TENSOR##_size*TENSOR##_stride; \ - for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \ - { \ - TENSOR##_counter[TENSOR##_i]++; \ - /* Jump ahread by the stride of this dimension */ \ - TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \ -\ - if(TENSOR##_counter[TENSOR##_i] == TENSOR##_sizes[TENSOR##_i]) \ - { \ - if(TENSOR##_i == 0) \ - { \ - TH_TENSOR_APPLY_hasFinished = 1; \ - break; \ - } \ - else \ - { \ - /* Reset the pointer to the beginning of the chunk defined by this dimension */ \ - TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \ - TENSOR##_counter[TENSOR##_i] = 0; \ - } \ - } \ - else \ - break; \ - } \ - TENSOR##_i = 0; \ - } \ - -#define TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIM, CODE) \ -{ \ - int TH_TENSOR_APPLY_hasFinished = 0; \ - long TH_TENSOR_dim_index = 0; \ - __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \ - __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \ - __TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, DIM, 1) \ - \ - int elements_equal = 1; \ - if(TENSOR1##_n != TENSOR2##_n) { \ - elements_equal = 0; \ - } \ - else if(TENSOR1##_n != TENSOR3##_n) { \ - elements_equal = 0; \ - } \ - if (elements_equal == 0) { \ - THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ - THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ - THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \ - THError("inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same " \ - "number of elements, but got %d, %d and %d elements respectively", \ - #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, \ - TENSOR1##_n, TENSOR2##_n, TENSOR3##_n); \ - } \ - \ - while(!TH_TENSOR_APPLY_hasFinished) \ - { \ - /* Loop through the inner most region of the Tensor */ \ - for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size && TENSOR3##_i < TENSOR3##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR3##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride, TENSOR3##_data += TENSOR3##_stride) /* 0 et pas TENSOR##_dim! */ \ - { \ - CODE \ - } \ - __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \ - __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \ - __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR3, 0) \ - } \ - if(TENSOR1##_counter != NULL) \ - THFree(TENSOR1##_counter); \ - if(TENSOR2##_counter != NULL) \ - THFree(TENSOR2##_counter); \ - if(TENSOR3##_counter != NULL) \ - THFree(TENSOR3##_counter); \ -} - -#define TH_TENSOR_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \ - TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, -1, CODE) - -#define TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, DIM, CODE) \ -{ \ - int TH_TENSOR_APPLY_hasFinished = 0; \ - long TH_TENSOR_dim_index = 0; \ - __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \ - __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \ -\ - if(TENSOR1##_n != TENSOR2##_n) { \ - THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ - THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ - THError("inconsistent tensor size, expected %s %s and %s %s to have the same " \ - "number of elements, but got %d and %d elements respectively", \ - #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, TENSOR1##_n, TENSOR2##_n); \ - } \ - while(!TH_TENSOR_APPLY_hasFinished) \ - { \ - /* Loop through the inner most region of the Tensor */ \ - for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \ - { \ - CODE \ - } \ - __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \ - __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \ - } \ - if(TENSOR1##_counter != NULL) \ - THFree(TENSOR1##_counter); \ - if(TENSOR2##_counter != NULL) \ - THFree(TENSOR2##_counter); \ -} - -#define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ - TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, -1, CODE) - -#define TH_TENSOR_APPLY_D(TYPE, TENSOR, DIM, CODE) \ -{ \ - int TH_TENSOR_APPLY_hasFinished = 0; \ - long TH_TENSOR_dim_index = 0; \ - __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, 0) \ -\ - while(!TH_TENSOR_APPLY_hasFinished) \ - { \ - /* Loop through the inner most region of the Tensor */ \ - for(; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \ - { \ - CODE \ - } \ - __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1) \ - } \ - THFree(TENSOR##_counter); \ -} - -#define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \ - TH_TENSOR_APPLY_D(TYPE, TENSOR, -1, CODE) - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THTensorDimApply.h b/contrib/lua-torch/torch7/lib/TH/THTensorDimApply.h deleted file mode 100644 index 6727e1f7f..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THTensorDimApply.h +++ /dev/null @@ -1,324 +0,0 @@ -#ifndef TH_TENSOR_DIM_APPLY_INC -#define TH_TENSOR_DIM_APPLY_INC - -#define TH_TENSOR_DIM_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIMENSION, CODE) \ -{ \ - TYPE1 *TENSOR1##_data = NULL; \ - long TENSOR1##_stride = 0, TENSOR1##_size = 0; \ - TYPE2 *TENSOR2##_data = NULL; \ - long TENSOR2##_stride = 0, TENSOR2##_size = 0; \ - TYPE3 *TENSOR3##_data = NULL; \ - long TENSOR3##_stride = 0, TENSOR3##_size = 0; \ - long *TH_TENSOR_DIM_APPLY_counter = NULL; \ - int TH_TENSOR_DIM_APPLY_hasFinished = 0; \ - int TH_TENSOR_DIM_APPLY_i; \ -\ - if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \ - THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, TENSOR1->nDimension); \ - int same_dims = 1; \ - if( TENSOR1->nDimension != TENSOR2->nDimension ) { \ - same_dims = 0; \ - } \ - if( TENSOR1->nDimension != TENSOR3->nDimension ) { \ - same_dims = 0; \ - } \ - if (same_dims == 0) { \ - THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ - THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ - THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \ - THError("inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same " \ - "number of dimensions", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str); \ - } \ - int shape_check_flag = 0; \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ - continue; \ - if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) \ - shape_check_flag = 1; \ - if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR3->size[TH_TENSOR_DIM_APPLY_i]) \ - shape_check_flag = 1; \ - } \ - \ - if (shape_check_flag == 1) { \ - THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ - THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ - THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \ - THError("Expected %s %s, %s %s and %s %s to have the same size in dimension %d", \ - #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, DIMENSION); \ - } \ -\ - TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ -\ - TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \ - TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \ - TENSOR1##_size = TENSOR1->size[DIMENSION]; \ -\ - TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \ - TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \ - TENSOR2##_size = TENSOR2->size[DIMENSION]; \ -\ - TENSOR3##_data = (TENSOR3)->storage->data+(TENSOR3)->storageOffset; \ - TENSOR3##_stride = (TENSOR3)->stride[DIMENSION]; \ - TENSOR3##_size = TENSOR3->size[DIMENSION]; \ -\ - while(!TH_TENSOR_DIM_APPLY_hasFinished) \ - { \ - CODE \ -\ - if(TENSOR1->nDimension == 1) \ - break; \ - \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ - { \ - TH_TENSOR_DIM_APPLY_hasFinished = 1; \ - break; \ - } \ - continue; \ - } \ -\ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \ - TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ - TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ - TENSOR3##_data += TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \ -\ - if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ - { \ - TH_TENSOR_DIM_APPLY_hasFinished = 1; \ - break; \ - } \ - else \ - { \ - TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ - TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ - TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ - } \ - } \ - else \ - break; \ - } \ - } \ - THFree(TH_TENSOR_DIM_APPLY_counter); \ -} - -/** - * Similar to DIM_APPLY(...) but we maintain two sets of pointers: one for the first tensor - * and one for the second. The two tensors must have the same shape, other than at the - * specified DIMENSION. This function makes it easy to store the output from reducing the - * TENSOR at index. For example, in the sum example described below, we could instead do: - * - * long i = 0; - * TYPE1 sum; - * - * for (i = 0; i < TENSOR1##_size; ++i) { - * sum += TENSOR1##_data[i * TENSOR1##_stride] - * } - * *TENSOR2##_data = (TYPE2) sum; - * - * In particular, we guarantee that the offset into TENSOR2 will be what you would get if - * you applied all of the index values used to generate the offset into TENSOR1. - */ -#define TH_TENSOR_DIM_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, DIMENSION, CODE) \ -{ \ - TYPE1 *TENSOR1##_data = NULL; \ - long TENSOR1##_stride = 0, TENSOR1##_size = 0; \ - TYPE2 *TENSOR2##_data = NULL; \ - long TENSOR2##_stride = 0, TENSOR2##_size = 0; \ - long *TH_TENSOR_DIM_APPLY_counter = NULL; \ - int TH_TENSOR_DIM_APPLY_hasFinished = 0; \ - int TH_TENSOR_DIM_APPLY_i; \ -\ - if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \ - THError("invalid dimension %d (expected to be 0 <= dim < %d)", DIMENSION, TENSOR1->nDimension); \ - if( TENSOR1->nDimension != TENSOR2->nDimension ) { \ - THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ - THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ - THError("inconsistent tensor size, expected %s %s and %s %s to have the same " \ - "number of dimensions", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str); \ - } \ - int shape_check_flag = 0; \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ - continue; \ - if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) { \ - THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \ - THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \ - THError("Expected %s %s and %s %s to have the same size in dimension %d", \ - #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, DIMENSION); \ - } \ - } \ -\ - TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ -\ - TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \ - TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \ - TENSOR1##_size = TENSOR1->size[DIMENSION]; \ -\ - TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \ - TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \ - TENSOR2##_size = TENSOR2->size[DIMENSION]; \ -\ - while(!TH_TENSOR_DIM_APPLY_hasFinished) \ - { \ - CODE \ -\ - if(TENSOR1->nDimension == 1) \ - break; \ - \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ - { \ - TH_TENSOR_DIM_APPLY_hasFinished = 1; \ - break; \ - } \ - continue; \ - } \ -\ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \ - TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ - TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ -\ - if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \ - { \ - TH_TENSOR_DIM_APPLY_hasFinished = 1; \ - break; \ - } \ - else \ - { \ - TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \ - TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ - } \ - } \ - else \ - break; \ - } \ - } \ - THFree(TH_TENSOR_DIM_APPLY_counter); \ -} - -/** - * The basic idea for DIM_APPLY: Given a TENSOR and a DIMENSION, provide access to the data stored - * at all sets of dimension values other than DIMENSION, such that we can get all the values at those - * fixed indices for the various values at DIMENSION. - * - * Suppose we have a 2x3x4 Tensor A, and we have DIMENSION=2. Then we will hit CODE (2x3) times, and the - * pointer into storage will be at: - * - * A[0][0] - * A[0][1] - * A[0][2] - * A[1][0] - * A[1][1] - * A[1][2] - * - * And at each point, we can access the data for each of the four elements of the Tensor via - * TENSOR##_stride. So for example, if we wanted to sum the elements there, we could do: - * - * long i = 0; - * TYPE sum; - * for (i = 0; i < TENSOR##_size; i++) { - * sum += TENSOR##_data[i * TENSOR##_stride] - * } - * - * Note that we don't have to have DIMENSION be the last tensor. If we have DIMENSION=1, then we will hit the - * code (2x4) times, with pointer into the storage at: - * - * offset + - * stride_0 * 0 + stride_2 * 0 - * stride_0 * 1 + stride_2 * 0 - * stride_0 * 0 + stride_2 * 1 - * stride_0 * 1 + stride_2 * 1 - * stride_0 * 0 + stride_2 * 2 - * stride_0 * 1 + stride_2 * 2 - * stride_0 * 0 + stride_2 * 3 - * stride_0 * 1 + stride_2 * 3 - * - * So we can again sum over the values at DIMENSION with the other indices fixed. - */ -#define TH_TENSOR_DIM_APPLY(TYPE, TENSOR, DIMENSION, CODE) \ -{ \ - TYPE *TENSOR##_data = NULL; \ - long TENSOR##_stride = 0, TENSOR##_size = 0; \ - long *TH_TENSOR_DIM_APPLY_counter = NULL; \ - int TH_TENSOR_DIM_APPLY_hasFinished = 0; \ - int TH_TENSOR_DIM_APPLY_i; \ -\ - if( (DIMENSION < 0) || (DIMENSION >= TENSOR->nDimension) ) \ - THError("invalid dimension"); \ -\ - TENSOR##_data = (TENSOR)->storage->data+(TENSOR)->storageOffset; \ - TENSOR##_stride = (TENSOR)->stride[DIMENSION]; \ - TENSOR##_size = TENSOR->size[DIMENSION]; \ - /* Counter stores the indices into the Tensor at any time */ \ - TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR->nDimension)); \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ -\ - while(!TH_TENSOR_DIM_APPLY_hasFinished) \ - { \ - CODE \ -\ - if(TENSOR->nDimension == 1) \ - break; \ - \ - for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \ - { \ - /* Check if the index is equal to DIMENSION. We don't need to update the */ \ - /* offset if this is the case, and can consider the next index. However, */ \ - /* in the case that the DIMENSION is the last index in the Tensor, then */ \ - /* we have parsed the entire tensor and can exit */ \ - if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \ - { \ - if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \ - { \ - TH_TENSOR_DIM_APPLY_hasFinished = 1; \ - break; \ - } \ - continue; \ - } \ -\ - /* Bump the counter at this index, update the pointer */ \ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \ - TENSOR##_data += TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \ -\ - if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR->size[TH_TENSOR_DIM_APPLY_i]) \ - { \ - /* Handled TENSOR_size(dim) iterations for DIM_APPLY_i. If this is the last dimension, exit */ \ - if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \ - { \ - TH_TENSOR_DIM_APPLY_hasFinished = 1; \ - break; \ - } \ - else \ - { \ - /* Reset the counter, and the pointer to the beginning of the storage for this combination of indices */ \ - TENSOR##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \ - TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \ - } \ - } \ - else \ - break; \ - } \ - } \ - THFree(TH_TENSOR_DIM_APPLY_counter); \ -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THTensorMacros.h b/contrib/lua-torch/torch7/lib/TH/THTensorMacros.h deleted file mode 100644 index 15b67665e..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THTensorMacros.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef TH_TENSOR_MACROS_INC -#define TH_TENSOR_MACROS_INC - -/* fast method to access to tensor data */ - -#define THTensor_fastGet1d(self, x0) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]]) - -#define THTensor_fastGet2d(self, x0, x1) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]]) - -#define THTensor_fastGet3d(self, x0, x1, x2) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]]) - -#define THTensor_fastGet4d(self, x0, x1, x2, x3) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]]) - -#define THTensor_fastSet1d(self, x0, value) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]] = value) - -#define THTensor_fastSet2d(self, x0, x1, value) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]] = value) - -#define THTensor_fastSet3d(self, x0, x1, x2, value) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]] = value) - -#define THTensor_fastSet4d(self, x0, x1, x2, x3, value) \ - (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]] = value) - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/THVector.c b/contrib/lua-torch/torch7/lib/TH/THVector.c deleted file mode 100644 index 441057884..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THVector.c +++ /dev/null @@ -1,30 +0,0 @@ -#include "THVector.h" - -#include "generic/simd/simd.h" - -#ifdef __NEON__ -#include "vector/NEON.c" -#endif - -#ifdef __PPC64__ -#include "vector/VSX.c" -#endif - -#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) -#include "vector/SSE.c" -#endif - -#if defined(USE_AVX) -#include "vector/AVX.h" -#endif - -#if defined(USE_AVX2) -#include "vector/AVX2.h" -#endif - -#include "generic/THVectorDefault.c" -#include "THGenerateAllTypes.h" - -#include "generic/THVectorDispatch.c" -#include "THGenerateAllTypes.h" diff --git a/contrib/lua-torch/torch7/lib/TH/THVector.h b/contrib/lua-torch/torch7/lib/TH/THVector.h deleted file mode 100644 index e29917b93..000000000 --- a/contrib/lua-torch/torch7/lib/TH/THVector.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef TH_VECTOR_INC -#define TH_VECTOR_INC - -#include "THGeneral.h" - -#define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME) - -/* We are going to use dynamic dispatch, and want only to generate declarations - * of the vector functions */ -#include "generic/THVector.h" -#include "THGenerateAllTypes.h" - -#endif // TH_VECTOR_INC diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindARM.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindARM.cmake deleted file mode 100644 index 2dcb2a24f..000000000 --- a/contrib/lua-torch/torch7/lib/TH/cmake/FindARM.cmake +++ /dev/null @@ -1,76 +0,0 @@ -# Check if the processor is an ARM and if Neon instruction are available on the machine where -# the project is compiled. - -IF(CMAKE_SYSTEM_NAME MATCHES "Linux") - EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) - - #neon instruction can be found on the majority part of modern ARM processor - STRING(REGEX REPLACE "^.*(neon).*$" "\\1" NEON_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "neon" "${NEON_THERE}" NEON_TRUE) - IF (NEON_TRUE) - set(NEON_FOUND true CACHE BOOL "NEON available on host") - ELSE (NEON_TRUE) - set(NEON_FOUND false CACHE BOOL "NEON available on host") - ENDIF (NEON_TRUE) - - # on ARMv8, neon is inherit and instead listed as 'asimd' in /proc/cpuinfo - STRING(REGEX REPLACE "^.*(asimd).*$" "\\1" ASIMD_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "asimd" "${ASIMD_THERE}" ASIMD_TRUE) - IF (ASIMD_TRUE) - set(ASIMD_FOUND true CACHE BOOL "ASIMD/NEON available on host") - ELSE (ASIMD_TRUE) - set(ASIMD_FOUND false CACHE BOOL "ASIMD/NEON available on host") - ENDIF (ASIMD_TRUE) - - #Find the processor type (for now OMAP3 or OMAP4) - STRING(REGEX REPLACE "^.*(OMAP3).*$" "\\1" OMAP3_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "OMAP3" "${OMAP3_THERE}" OMAP3_TRUE) - IF (OMAP3_TRUE) - set(CORTEXA8_FOUND true CACHE BOOL "OMAP3 available on host") - ELSE (OMAP3_TRUE) - set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 available on host") - ENDIF (OMAP3_TRUE) - - #Find the processor type (for now OMAP3 or OMAP4) - STRING(REGEX REPLACE "^.*(OMAP4).*$" "\\1" OMAP4_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "OMAP4" "${OMAP4_THERE}" OMAP4_TRUE) - IF (OMAP4_TRUE) - set(CORTEXA9_FOUND true CACHE BOOL "OMAP4 available on host") - ELSE (OMAP4_TRUE) - set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 available on host") - ENDIF (OMAP4_TRUE) - -ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") - EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE - CPUINFO) - - #neon instruction can be found on the majority part of modern ARM processor - STRING(REGEX REPLACE "^.*(neon).*$" "\\1" NEON_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "neon" "${NEON_THERE}" NEON_TRUE) - IF (NEON_TRUE) - set(NEON_FOUND true CACHE BOOL "NEON available on host") - ELSE (NEON_TRUE) - set(NEON_FOUND false CACHE BOOL "NEON available on host") - ENDIF (NEON_TRUE) - -ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") - # TODO - set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 not available on host") - set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 not available on host") - set(NEON_FOUND false CACHE BOOL "NEON not available on host") -ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") - set(CORTEXA8_FOUND false CACHE BOOL "OMAP3 not available on host") - set(CORTEXA9_FOUND false CACHE BOOL "OMAP4 not available on host") - set(NEON_FOUND false CACHE BOOL "NEON not available on host") -ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") - -if(NOT NEON_FOUND) - MESSAGE(STATUS "Could not find hardware support for NEON on this machine.") -endif(NOT NEON_FOUND) -if(NOT CORTEXA8_FOUND) - MESSAGE(STATUS "No OMAP3 processor on this machine.") -endif(NOT CORTEXA8_FOUND) -if(NOT CORTEXA9_FOUND) - MESSAGE(STATUS "No OMAP4 processor on this machine.") -endif(NOT CORTEXA9_FOUND) -mark_as_advanced(NEON_FOUND) diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindBLAS.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindBLAS.cmake deleted file mode 100644 index 1f254d231..000000000 --- a/contrib/lua-torch/torch7/lib/TH/cmake/FindBLAS.cmake +++ /dev/null @@ -1,309 +0,0 @@ -# - Find BLAS library -# This module finds an installed fortran library that implements the BLAS -# linear-algebra interface (see http://www.netlib.org/blas/). -# The list of libraries searched for is taken -# from the autoconf macro file, acx_blas.m4 (distributed at -# http://ac-archive.sourceforge.net/ac-archive/acx_blas.html). -# -# This module sets the following variables: -# BLAS_FOUND - set to true if a library implementing the BLAS interface is found. -# BLAS_INFO - name of the detected BLAS library. -# BLAS_F2C - set to true if following the f2c return convention -# BLAS_LIBRARIES - list of libraries to link against to use BLAS -# BLAS_INCLUDE_DIR - include directory - -# Do nothing is BLAS was found before -IF(NOT BLAS_FOUND) - -SET(BLAS_LIBRARIES) -SET(BLAS_INCLUDE_DIR) -SET(BLAS_INFO) -SET(BLAS_F2C) - -SET(WITH_BLAS "" CACHE STRING "Blas type [mkl/open/goto/acml/atlas/accelerate/veclib/generic]") - -# Old FindBlas -INCLUDE(CheckCSourceRuns) -INCLUDE(CheckFortranFunctionExists) - -MACRO(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list) - # This macro checks for the existence of the combination of fortran libraries - # given by _list. If the combination is found, this macro checks (using the - # Check_Fortran_Function_Exists macro) whether can link against that library - # combination using the name of a routine given by _name using the linker - # flags given by _flags. If the combination of libraries is found and passes - # the link test, LIBRARIES is set to the list of complete library paths that - # have been found. Otherwise, LIBRARIES is set to NOTFOUND. - # N.B. _prefix is the prefix applied to the names of all cached variables that - # are generated internally and marked advanced by this macro. - - set(__list) - foreach(_elem ${_list}) - if(__list) - set(__list "${__list} - ${_elem}") - else(__list) - set(__list "${_elem}") - endif(__list) - endforeach(_elem) - message(STATUS "Checking for [${__list}]") - - set(_libraries_work TRUE) - set(${LIBRARIES}) - set(_combined_name) - foreach(_library ${_list}) - set(_combined_name ${_combined_name}_${_library}) - if(_libraries_work) - if ( WIN32 ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS ENV LIB - PATHS ENV PATH ) - endif ( WIN32 ) - if ( APPLE ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 - ENV DYLD_LIBRARY_PATH ) - else ( APPLE ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 - ENV LD_LIBRARY_PATH ) - endif( APPLE ) - mark_as_advanced(${_prefix}_${_library}_LIBRARY) - set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) - set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) - MESSAGE(STATUS " Library ${_library}: ${${_prefix}_${_library}_LIBRARY}") - endif(_libraries_work) - endforeach(_library ${_list}) - if(_libraries_work) - # Test this combination of libraries. - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) - if (CMAKE_Fortran_COMPILER_WORKS) - check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS) - else (CMAKE_Fortran_COMPILER_WORKS) - check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) - endif (CMAKE_Fortran_COMPILER_WORKS) - set(CMAKE_REQUIRED_LIBRARIES) - mark_as_advanced(${_prefix}${_combined_name}_WORKS) - set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) - endif(_libraries_work) - if(NOT _libraries_work) - set(${LIBRARIES} NOTFOUND) - endif(NOT _libraries_work) -endmacro(Check_Fortran_Libraries) - -# Intel MKL? -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "mkl"))) - FIND_PACKAGE(MKL) - IF(MKL_FOUND) - SET(BLAS_INFO "mkl") - SET(BLAS_LIBRARIES ${MKL_LIBRARIES}) - SET(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR}) - SET(BLAS_VERSION ${MKL_VERSION}) - ENDIF(MKL_FOUND) -endif() - -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "openblas") - if(BLAS_LIBRARIES) - set(BLAS_INFO "open") - endif(BLAS_LIBRARIES) -endif() - -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "openblas;pthread") - if(BLAS_LIBRARIES) - set(BLAS_INFO "open") - endif(BLAS_LIBRARIES) -endif() - -if((NOT BLAS_LIBRARIES) AND (WIN32) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "libopenblas") - if(BLAS_LIBRARIES) - set(BLAS_INFO "open") - endif(BLAS_LIBRARIES) -endif() - -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "goto"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "goto2;gfortran") - if (BLAS_LIBRARIES) - set(BLAS_INFO "goto") - endif (BLAS_LIBRARIES) -endif() - -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "goto"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "goto2;gfortran;pthread") - if (BLAS_LIBRARIES) - set(BLAS_INFO "goto") - endif (BLAS_LIBRARIES) -endif() - -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "acml"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "acml;gfortran") - if (BLAS_LIBRARIES) - set(BLAS_INFO "acml") - endif (BLAS_LIBRARIES) -endif() - -# Apple BLAS library? -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "accelerate"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "Accelerate") - if (BLAS_LIBRARIES) - set(BLAS_INFO "accelerate") - set(BLAS_IS_ACCELERATE 1) - endif (BLAS_LIBRARIES) -endif() - -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "veclib"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "vecLib") - if (BLAS_LIBRARIES) - set(BLAS_INFO "veclib") - endif (BLAS_LIBRARIES) -endif() - -# BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "atlas"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "ptf77blas;atlas;gfortran") - if (BLAS_LIBRARIES) - set(BLAS_INFO "atlas") - endif (BLAS_LIBRARIES) -endif() - -# Generic BLAS library? -if((NOT BLAS_LIBRARIES) - AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "generic"))) - check_fortran_libraries( - BLAS_LIBRARIES - BLAS - sgemm - "" - "blas") - if (BLAS_LIBRARIES) - check_fortran_libraries( - TMP_BLAS_LIBRARIES - TMP_BLAS - openblas_get_num_threads - "" - "blas") - if (TMP_BLAS_LIBRARIES) - set(BLAS_INFO "open") - else() - set(BLAS_INFO "generic") - endif() - endif (BLAS_LIBRARIES) -endif() - -# Determine if blas was compiled with the f2c conventions -IF (BLAS_LIBRARIES) - SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) - CHECK_C_SOURCE_RUNS(" -#include <stdlib.h> -#include <stdio.h> -float x[4] = { 1, 2, 3, 4 }; -float y[4] = { .1, .01, .001, .0001 }; -int four = 4; -int one = 1; -extern double sdot_(); -int main() { - int i; - double r = sdot_(&four, x, &one, y, &one); - exit((float)r != (float).1234); -}" BLAS_F2C_DOUBLE_WORKS ) - CHECK_C_SOURCE_RUNS(" -#include <stdlib.h> -#include <stdio.h> -float x[4] = { 1, 2, 3, 4 }; -float y[4] = { .1, .01, .001, .0001 }; -int four = 4; -int one = 1; -extern float sdot_(); -int main() { - int i; - double r = sdot_(&four, x, &one, y, &one); - exit((float)r != (float).1234); -}" BLAS_F2C_FLOAT_WORKS ) - IF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS) - MESSAGE(STATUS "This BLAS uses the F2C return conventions") - SET(BLAS_F2C TRUE) - ELSE (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS) - SET(BLAS_F2C FALSE) - ENDIF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS) -ENDIF(BLAS_LIBRARIES) - -# epilogue - -if(BLAS_LIBRARIES) - set(BLAS_FOUND TRUE) -else(BLAS_LIBRARIES) - set(BLAS_FOUND FALSE) -endif(BLAS_LIBRARIES) - -IF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED) - message(FATAL_ERROR "Cannot find a library with BLAS API. Please specify library location.") -ENDIF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED) -IF(NOT BLAS_FIND_QUIETLY) - IF(BLAS_FOUND) - MESSAGE(STATUS "Found a library with BLAS API (${BLAS_INFO}).") - ELSE(BLAS_FOUND) - MESSAGE(STATUS "Cannot find a library with BLAS API. Not using BLAS.") - ENDIF(BLAS_FOUND) -ENDIF(NOT BLAS_FIND_QUIETLY) - -# Do nothing is BLAS was found before -ENDIF(NOT BLAS_FOUND) diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindLAPACK.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindLAPACK.cmake deleted file mode 100644 index 9eca0730f..000000000 --- a/contrib/lua-torch/torch7/lib/TH/cmake/FindLAPACK.cmake +++ /dev/null @@ -1,190 +0,0 @@ -# - Find LAPACK library -# This module finds an installed fortran library that implements the LAPACK -# linear-algebra interface (see http://www.netlib.org/lapack/). -# -# The approach follows that taken for the autoconf macro file, acx_lapack.m4 -# (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html). -# -# This module sets the following variables: -# LAPACK_FOUND - set to true if a library implementing the LAPACK interface is found -# LAPACK_LIBRARIES - list of libraries (using full path name) for LAPACK - -# Note: I do not think it is a good idea to mixup different BLAS/LAPACK versions -# Hence, this script wants to find a Lapack library matching your Blas library - -# Do nothing if LAPACK was found before -IF(NOT LAPACK_FOUND) - -SET(LAPACK_LIBRARIES) -SET(LAPACK_INFO) - -IF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) - FIND_PACKAGE(BLAS) -ELSE(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) - FIND_PACKAGE(BLAS REQUIRED) -ENDIF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) - -# Old search lapack script -include(CheckFortranFunctionExists) - -macro(Check_Lapack_Libraries LIBRARIES _prefix _name _flags _list _blas) - # This macro checks for the existence of the combination of fortran libraries - # given by _list. If the combination is found, this macro checks (using the - # Check_Fortran_Function_Exists macro) whether can link against that library - # combination using the name of a routine given by _name using the linker - # flags given by _flags. If the combination of libraries is found and passes - # the link test, LIBRARIES is set to the list of complete library paths that - # have been found. Otherwise, LIBRARIES is set to FALSE. - # N.B. _prefix is the prefix applied to the names of all cached variables that - # are generated internally and marked advanced by this macro. - set(_libraries_work TRUE) - set(${LIBRARIES}) - set(_combined_name) - foreach(_library ${_list}) - set(_combined_name ${_combined_name}_${_library}) - if(_libraries_work) - if (WIN32) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} PATHS ENV LIB PATHS ENV PATH) - else (WIN32) - if(APPLE) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 - ENV DYLD_LIBRARY_PATH) - else(APPLE) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 - ENV LD_LIBRARY_PATH) - endif(APPLE) - endif(WIN32) - mark_as_advanced(${_prefix}_${_library}_LIBRARY) - set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) - set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) - endif(_libraries_work) - endforeach(_library ${_list}) - if(_libraries_work) - # Test this combination of libraries. - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas}) - if (CMAKE_Fortran_COMPILER_WORKS) - check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS) - else (CMAKE_Fortran_COMPILER_WORKS) - check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) - endif (CMAKE_Fortran_COMPILER_WORKS) - set(CMAKE_REQUIRED_LIBRARIES) - mark_as_advanced(${_prefix}${_combined_name}_WORKS) - set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) - endif(_libraries_work) - if(NOT _libraries_work) - set(${LIBRARIES} FALSE) - endif(NOT _libraries_work) -endmacro(Check_Lapack_Libraries) - - -if(BLAS_FOUND) - - # Intel MKL - IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "mkl")) - IF(MKL_LAPACK_LIBRARIES) - SET(LAPACK_LIBRARIES ${MKL_LAPACK_LIBRARIES} ${MKL_LIBRARIES}) - ELSE(MKL_LAPACK_LIBRARIES) - SET(LAPACK_LIBRARIES ${MKL_LIBRARIES}) - ENDIF(MKL_LAPACK_LIBRARIES) - SET(LAPACK_INCLUDE_DIR ${MKL_INCLUDE_DIR}) - SET(LAPACK_INFO "mkl") - ENDIF() - - # OpenBlas - IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "open")) - SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) - check_function_exists("cheev_" OPEN_LAPACK_WORKS) - if(OPEN_LAPACK_WORKS) - SET(LAPACK_INFO "open") - else() - message(STATUS "It seems OpenBlas has not been compiled with Lapack support") - endif() - endif() - - # GotoBlas - IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "goto")) - SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) - check_function_exists("cheev_" GOTO_LAPACK_WORKS) - if(GOTO_LAPACK_WORKS) - SET(LAPACK_INFO "goto") - else() - message(STATUS "It seems GotoBlas has not been compiled with Lapack support") - endif() - endif() - - # ACML - IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "acml")) - SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) - check_function_exists("cheev_" ACML_LAPACK_WORKS) - if(ACML_LAPACK_WORKS) - SET(LAPACK_INFO "acml") - else() - message(STATUS "Strangely, this ACML library does not support Lapack?!") - endif() - endif() - - # Accelerate - IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "accelerate")) - SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) - check_function_exists("cheev_" ACCELERATE_LAPACK_WORKS) - if(ACCELERATE_LAPACK_WORKS) - SET(LAPACK_INFO "accelerate") - else() - message(STATUS "Strangely, this Accelerate library does not support Lapack?!") - endif() - endif() - - # vecLib - IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL "veclib")) - SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES}) - check_function_exists("cheev_" VECLIB_LAPACK_WORKS) - if(VECLIB_LAPACK_WORKS) - SET(LAPACK_INFO "veclib") - else() - message(STATUS "Strangely, this vecLib library does not support Lapack?!") - endif() - endif() - - # Generic LAPACK library? - IF((NOT LAPACK_INFO) AND ((BLAS_INFO STREQUAL "generic") OR (BLAS_INFO STREQUAL "open"))) - check_lapack_libraries( - LAPACK_LIBRARIES - LAPACK - cheev - "" - "lapack" - "${BLAS_LIBRARIES}" - ) - if(LAPACK_LIBRARIES) - SET(LAPACK_INFO "generic") - endif(LAPACK_LIBRARIES) - endif() - -else(BLAS_FOUND) - message(STATUS "LAPACK requires BLAS") -endif(BLAS_FOUND) - -if(LAPACK_INFO) - set(LAPACK_FOUND TRUE) -else(LAPACK_INFO) - set(LAPACK_FOUND FALSE) -endif(LAPACK_INFO) - -IF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED) - message(FATAL_ERROR "Cannot find a library with LAPACK API. Please specify library location.") -ENDIF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED) -IF(NOT LAPACK_FIND_QUIETLY) - IF(LAPACK_FOUND) - MESSAGE(STATUS "Found a library with LAPACK API. (${LAPACK_INFO})") - ELSE(LAPACK_FOUND) - MESSAGE(STATUS "Cannot find a library with LAPACK API. Not using LAPACK.") - ENDIF(LAPACK_FOUND) -ENDIF(NOT LAPACK_FIND_QUIETLY) - -# Do nothing if LAPACK was found before -ENDIF(NOT LAPACK_FOUND) diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindMKL.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindMKL.cmake deleted file mode 100644 index 08b450985..000000000 --- a/contrib/lua-torch/torch7/lib/TH/cmake/FindMKL.cmake +++ /dev/null @@ -1,272 +0,0 @@ -# - Find INTEL MKL library -# -# This module finds the Intel Mkl libraries. -# -# This module sets the following variables: -# MKL_FOUND - set to true if a library implementing the CBLAS interface is found -# MKL_VERSION - best guess -# MKL_INCLUDE_DIR - path to include dir. -# MKL_LIBRARIES - list of libraries for base mkl -# MKL_LAPACK_LIBRARIES - list of libraries to add for lapack -# MKL_SCALAPACK_LIBRARIES - list of libraries to add for scalapack -# MKL_SOLVER_LIBRARIES - list of libraries to add for the solvers -# MKL_CDFT_LIBRARIES - list of libraries to add for the solvers - - -# Do nothing if MKL_FOUND was set before! -IF (NOT MKL_FOUND) - -SET(MKL_VERSION) -SET(MKL_INCLUDE_DIR) -SET(MKL_LIBRARIES) -SET(MKL_LAPACK_LIBRARIES) -SET(MKL_SCALAPACK_LIBRARIES) -SET(MKL_SOLVER_LIBRARIES) -SET(MKL_CDFT_LIBRARIES) - -# Includes -INCLUDE(CheckTypeSize) -INCLUDE(CheckFunctionExists) - -# Intel Compiler Suite -SET(INTEL_COMPILER_DIR CACHE STRING - "Root directory of the Intel Compiler Suite (contains ipp, mkl, etc.)") -SET(INTEL_MKL_DIR CACHE STRING - "Root directory of the Intel MKL (standalone)") -SET(INTEL_MKL_SEQUENTIAL OFF CACHE BOOL - "Force using the sequential (non threaded) libraries") - -# Checks -CHECK_TYPE_SIZE("void*" SIZE_OF_VOIDP) -IF ("${SIZE_OF_VOIDP}" EQUAL 8) - SET(mklvers "em64t") - SET(iccvers "intel64") - SET(mkl64s "_lp64") -ELSE ("${SIZE_OF_VOIDP}" EQUAL 8) - SET(mklvers "32") - SET(iccvers "ia32") - SET(mkl64s) -ENDIF ("${SIZE_OF_VOIDP}" EQUAL 8) -IF(CMAKE_COMPILER_IS_GNUCC) - SET(mklthreads "mkl_gnu_thread" "mkl_intel_thread") - SET(mklifaces "gf" "intel") - SET(mklrtls "iomp5") -ELSE(CMAKE_COMPILER_IS_GNUCC) - SET(mklthreads "mkl_intel_thread") - SET(mklifaces "intel") - SET(mklrtls "iomp5" "guide") - IF (MSVC) - SET(mklrtls "libiomp5md") - ENDIF (MSVC) -ENDIF (CMAKE_COMPILER_IS_GNUCC) - -# Kernel libraries dynamically loaded -SET(mklkerlibs "mc" "mc3" "nc" "p4n" "p4m" "p4m3" "p4p" "def") -SET(mklseq) - - - -# Paths -SET(saved_CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}) -SET(saved_CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH}) -IF (INTEL_COMPILER_DIR) - # TODO: diagnostic if dir does not exist - SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} - "${INTEL_COMPILER_DIR}/lib/${iccvers}") - IF (NOT INTEL_MKL_DIR) - SET(INTEL_MKL_DIR "${INTEL_COMPILER_DIR}/mkl") - ENDIF (NOT INTEL_MKL_DIR) -ENDIF (INTEL_COMPILER_DIR) -IF (INTEL_MKL_DIR) - # TODO: diagnostic if dir does not exist - SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} - "${INTEL_MKL_DIR}/include") - SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} - "${INTEL_MKL_DIR}/lib/${mklvers}") - IF (MSVC) - SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} - "${INTEL_MKL_DIR}/lib/${iccvers}") - ENDIF (MSVC) -ENDIF (INTEL_MKL_DIR) - -# Try linking multiple libs -MACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags) - # This macro checks for the existence of the combination of libraries given by _list. - # If the combination is found, this macro whether we can link against that library - # combination using the name of a routine given by _name using the linker - # flags given by _flags. If the combination of libraries is found and passes - # the link test, LIBRARIES is set to the list of complete library paths that - # have been found. Otherwise, LIBRARIES is set to FALSE. - # N.B. _prefix is the prefix applied to the names of all cached variables that - # are generated internally and marked advanced by this macro. - SET(_prefix "${LIBRARIES}") - # start checking - SET(_libraries_work TRUE) - SET(${LIBRARIES}) - SET(_combined_name) - SET(_paths) - set(__list) - foreach(_elem ${_list}) - if(__list) - set(__list "${__list} - ${_elem}") - else(__list) - set(__list "${_elem}") - endif(__list) - endforeach(_elem) - message(STATUS "Checking for [${__list}]") - FOREACH(_library ${_list}) - SET(_combined_name ${_combined_name}_${_library}) - IF(_libraries_work) - FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library}) - MARK_AS_ADVANCED(${_prefix}_${_library}_LIBRARY) - SET(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) - SET(_libraries_work ${${_prefix}_${_library}_LIBRARY}) - IF(${_prefix}_${_library}_LIBRARY) - MESSAGE(STATUS " Library ${_library}: ${${_prefix}_${_library}_LIBRARY}") - ELSE(${_prefix}_${_library}_LIBRARY) - MESSAGE(STATUS " Library ${_library}: not found") - ENDIF(${_prefix}_${_library}_LIBRARY) - ENDIF(_libraries_work) - ENDFOREACH(_library ${_list}) - # Test this combination of libraries. - IF(_libraries_work) - SET(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) - CHECK_FUNCTION_EXISTS(${_name} ${_prefix}${_combined_name}_WORKS) - SET(CMAKE_REQUIRED_LIBRARIES) - MARK_AS_ADVANCED(${_prefix}${_combined_name}_WORKS) - SET(_libraries_work ${${_prefix}${_combined_name}_WORKS}) - ENDIF(_libraries_work) - # Fin - IF(_libraries_work) - ELSE (_libraries_work) - SET(${LIBRARIES}) - MARK_AS_ADVANCED(${LIBRARIES}) - ENDIF(_libraries_work) -ENDMACRO(CHECK_ALL_LIBRARIES) - -if(WIN32) - set(mkl_m "") -else(WIN32) - set(mkl_m "m") -endif(WIN32) - - -# Check for version 10/11 -IF (NOT MKL_LIBRARIES) - SET(MKL_VERSION 1011) -ENDIF (NOT MKL_LIBRARIES) -FOREACH(mklrtl ${mklrtls} "") - FOREACH(mkliface ${mklifaces}) - FOREACH(mkl64 ${mkl64s} "") - FOREACH(mklthread ${mklthreads}) - IF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL) - CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm - "mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m}" "") - ENDIF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL) - ENDFOREACH(mklthread) - ENDFOREACH(mkl64) - ENDFOREACH(mkliface) -ENDFOREACH(mklrtl) -FOREACH(mklrtl ${mklrtls} "") - FOREACH(mkliface ${mklifaces}) - FOREACH(mkl64 ${mkl64s} "") - IF (NOT MKL_LIBRARIES) - CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm - "mkl_${mkliface}${mkl64};mkl_sequential;mkl_core;${mkl_m}" "") - IF (MKL_LIBRARIES) - SET(mklseq "_sequential") - ENDIF (MKL_LIBRARIES) - ENDIF (NOT MKL_LIBRARIES) - ENDFOREACH(mkl64) - ENDFOREACH(mkliface) -ENDFOREACH(mklrtl) -FOREACH(mklrtl ${mklrtls} "") - FOREACH(mkliface ${mklifaces}) - FOREACH(mkl64 ${mkl64s} "") - FOREACH(mklthread ${mklthreads}) - IF (NOT MKL_LIBRARIES) - CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm - "mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m}" "") - ENDIF (NOT MKL_LIBRARIES) - ENDFOREACH(mklthread) - ENDFOREACH(mkl64) - ENDFOREACH(mkliface) -ENDFOREACH(mklrtl) - -# Check for older versions -IF (NOT MKL_LIBRARIES) - SET(MKL_VERSION 900) - CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm - "mkl;guide;pthread;m" "") -ENDIF (NOT MKL_LIBRARIES) - -# Include files -IF (MKL_LIBRARIES) - FIND_PATH(MKL_INCLUDE_DIR "mkl_cblas.h") - MARK_AS_ADVANCED(MKL_INCLUDE_DIR) -ENDIF (MKL_LIBRARIES) - -# Other libraries -IF (MKL_LIBRARIES) - FOREACH(mkl64 ${mkl64s} "_core" "") - FOREACH(mkls ${mklseq} "") - IF (NOT MKL_LAPACK_LIBRARIES) - FIND_LIBRARY(MKL_LAPACK_LIBRARIES NAMES "mkl_lapack${mkl64}${mkls}") - MARK_AS_ADVANCED(MKL_LAPACK_LIBRARIES) - ENDIF (NOT MKL_LAPACK_LIBRARIES) - IF (NOT MKL_SCALAPACK_LIBRARIES) - FIND_LIBRARY(MKL_SCALAPACK_LIBRARIES NAMES "mkl_scalapack${mkl64}${mkls}") - MARK_AS_ADVANCED(MKL_SCALAPACK_LIBRARIES) - ENDIF (NOT MKL_SCALAPACK_LIBRARIES) - IF (NOT MKL_SOLVER_LIBRARIES) - FIND_LIBRARY(MKL_SOLVER_LIBRARIES NAMES "mkl_solver${mkl64}${mkls}") - MARK_AS_ADVANCED(MKL_SOLVER_LIBRARIES) - ENDIF (NOT MKL_SOLVER_LIBRARIES) - IF (NOT MKL_CDFT_LIBRARIES) - FIND_LIBRARY(MKL_CDFT_LIBRARIES NAMES "mkl_cdft${mkl64}${mkls}") - MARK_AS_ADVANCED(MKL_CDFT_LIBRARIES) - ENDIF (NOT MKL_CDFT_LIBRARIES) - ENDFOREACH(mkls) - ENDFOREACH(mkl64) -ENDIF (MKL_LIBRARIES) - -# LibIRC: intel compiler always links this; -# gcc does not; but mkl kernels sometimes need it. -IF (MKL_LIBRARIES) - IF (CMAKE_COMPILER_IS_GNUCC) - FIND_LIBRARY(MKL_KERNEL_libirc "irc") - ELSEIF (CMAKE_C_COMPILER_ID AND NOT CMAKE_C_COMPILER_ID STREQUAL "Intel") - FIND_LIBRARY(MKL_KERNEL_libirc "irc") - ENDIF (CMAKE_COMPILER_IS_GNUCC) - MARK_AS_ADVANCED(MKL_KERNEL_libirc) - IF (MKL_KERNEL_libirc) - SET(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_KERNEL_libirc}) - ENDIF (MKL_KERNEL_libirc) -ENDIF (MKL_LIBRARIES) - -# Final -SET(CMAKE_LIBRARY_PATH ${saved_CMAKE_LIBRARY_PATH}) -SET(CMAKE_INCLUDE_PATH ${saved_CMAKE_INCLUDE_PATH}) -IF (MKL_LIBRARIES) - SET(MKL_FOUND TRUE) -ELSE (MKL_LIBRARIES) - SET(MKL_FOUND FALSE) - SET(MKL_VERSION) -ENDIF (MKL_LIBRARIES) - -# Standard termination -IF(NOT MKL_FOUND AND MKL_FIND_REQUIRED) - MESSAGE(FATAL_ERROR "MKL library not found. Please specify library location") -ENDIF(NOT MKL_FOUND AND MKL_FIND_REQUIRED) -IF(NOT MKL_FIND_QUIETLY) - IF(MKL_FOUND) - MESSAGE(STATUS "MKL library found") - ELSE(MKL_FOUND) - MESSAGE(STATUS "MKL library not found") - ENDIF(MKL_FOUND) -ENDIF(NOT MKL_FIND_QUIETLY) - -# Do nothing if MKL_FOUND was set before! -ENDIF (NOT MKL_FOUND) - - diff --git a/contrib/lua-torch/torch7/lib/TH/cmake/FindSSE.cmake b/contrib/lua-torch/torch7/lib/TH/cmake/FindSSE.cmake deleted file mode 100644 index a14abe8d4..000000000 --- a/contrib/lua-torch/torch7/lib/TH/cmake/FindSSE.cmake +++ /dev/null @@ -1,125 +0,0 @@ -INCLUDE(CheckCSourceRuns) -INCLUDE(CheckCXXSourceRuns) - -SET(SSE1_CODE " - #include <xmmintrin.h> - - int main() - { - __m128 a; - float vals[4] = {0,0,0,0}; - a = _mm_loadu_ps(vals); - return 0; - }") - -SET(SSE2_CODE " - #include <emmintrin.h> - - int main() - { - __m128d a; - double vals[2] = {0,0}; - a = _mm_loadu_pd(vals); - return 0; - }") - -SET(SSE3_CODE " - #include <pmmintrin.h> - - int main( ) - { - const int vals[4] = {0,0,0,0}; - __m128i a; - a = _mm_lddqu_si128( (const __m128i*)vals ); - return 0; - }") - -SET(SSE4_1_CODE " - #include <smmintrin.h> - - int main () - { - __m128i a = {0,0,0,0}, b = {0,0,0,0}; - __m128i res = _mm_max_epi8(a, b); - - return 0; - } -") - -SET(SSE4_2_CODE " - #include <nmmintrin.h> - - int main() - { - __m128i a = {0,0,0,0}, b = {0,0,0,0}, c = {0,0,0,0}; - c = _mm_cmpgt_epi64(a, b); - return 0; - } -") - -SET(AVX_CODE " - #include <immintrin.h> - - int main() - { - __m256 a; - a = _mm256_set1_ps(0); - return 0; - } -") - -SET(AVX2_CODE " - #include <immintrin.h> - - int main() - { - __m256i a = {0}; - a = _mm256_abs_epi16(a); - return 0; - } -") - -MACRO(CHECK_SSE lang type flags) - SET(__FLAG_I 1) - SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) - FOREACH(__FLAG ${flags}) - IF(NOT ${lang}_${type}_FOUND) - SET(CMAKE_REQUIRED_FLAGS ${__FLAG}) - IF(lang STREQUAL "CXX") - CHECK_CXX_SOURCE_RUNS("${${type}_CODE}" ${lang}_HAS_${type}_${__FLAG_I}) - ELSE() - CHECK_C_SOURCE_RUNS("${${type}_CODE}" ${lang}_HAS_${type}_${__FLAG_I}) - ENDIF() - IF(${lang}_HAS_${type}_${__FLAG_I}) - SET(${lang}_${type}_FOUND TRUE CACHE BOOL "${lang} ${type} support") - SET(${lang}_${type}_FLAGS "${__FLAG}" CACHE STRING "${lang} ${type} flags") - ENDIF() - MATH(EXPR __FLAG_I "${__FLAG_I}+1") - ENDIF() - ENDFOREACH() - SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) - - IF(NOT ${lang}_${type}_FOUND) - SET(${lang}_${type}_FOUND FALSE CACHE BOOL "${lang} ${type} support") - SET(${lang}_${type}_FLAGS "" CACHE STRING "${lang} ${type} flags") - ENDIF() - - MARK_AS_ADVANCED(${lang}_${type}_FOUND ${lang}_${type}_FLAGS) - -ENDMACRO() - -CHECK_SSE(C "SSE1" " ;-msse;/arch:SSE") -CHECK_SSE(C "SSE2" " ;-msse2;/arch:SSE2") -CHECK_SSE(C "SSE3" " ;-msse3;/arch:SSE3") -CHECK_SSE(C "SSE4_1" " ;-msse4.1;-msse4;/arch:SSE4") -CHECK_SSE(C "SSE4_2" " ;-msse4.2;-msse4;/arch:SSE4") -CHECK_SSE(C "AVX" " ;-mavx;/arch:AVX") -CHECK_SSE(C "AVX2" " ;-mavx2 -mfma;/arch:AVX2") - -CHECK_SSE(CXX "SSE1" " ;-msse;/arch:SSE") -CHECK_SSE(CXX "SSE2" " ;-msse2;/arch:SSE2") -CHECK_SSE(CXX "SSE3" " ;-msse3;/arch:SSE3") -CHECK_SSE(CXX "SSE4_1" " ;-msse4.1;-msse4;/arch:SSE4") -CHECK_SSE(CXX "SSE4_2" " ;-msse4.2;-msse4;/arch:SSE4") -CHECK_SSE(CXX "AVX" " ;-mavx;/arch:AVX") -CHECK_SSE(CXX "AVX2" " ;-mavx2 -mfma;/arch:AVX2") diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.c b/contrib/lua-torch/torch7/lib/TH/generic/THBlas.c deleted file mode 100644 index b04931f34..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.c +++ /dev/null @@ -1,412 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THBlas.c" -#else - - -#ifdef BLAS_F2C -# define ffloat double -#else -# define ffloat float -#endif - -TH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy); -TH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy); -TH_EXTERNC void dscal_(int *n, double *a, double *x, int *incx); -TH_EXTERNC void sscal_(int *n, float *a, float *x, int *incx); -TH_EXTERNC void dcopy_(int *n, double *x, int *incx, double *y, int *incy); -TH_EXTERNC void scopy_(int *n, float *x, int *incx, float *y, int *incy); -TH_EXTERNC void daxpy_(int *n, double *a, double *x, int *incx, double *y, int *incy); -TH_EXTERNC void saxpy_(int *n, float *a, float *x, int *incx, float *y, int *incy); -TH_EXTERNC double ddot_(int *n, double *x, int *incx, double *y, int *incy); -TH_EXTERNC ffloat sdot_(int *n, float *x, int *incx, float *y, int *incy); -TH_EXTERNC void dgemv_(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy); -TH_EXTERNC void sgemv_(char *trans, int *m, int *n, float *alpha, float *a, int *lda, float *x, int *incx, float *beta, float *y, int *incy); -TH_EXTERNC void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *a, int *lda); -TH_EXTERNC void sger_(int *m, int *n, float *alpha, float *x, int *incx, float *y, int *incy, float *a, int *lda); -TH_EXTERNC void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc); -TH_EXTERNC void sgemm_(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc); - - - -void THBlas_(swap)(long n, real *x, long incx, real *y, long incy) -{ - if(n == 1) - { - incx = 1; - incy = 1; - } - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) - { - int i_n = (int)n; - int i_incx = (int)incx; - int i_incy = (int)incy; - -#if defined(TH_REAL_IS_DOUBLE) - dswap_(&i_n, x, &i_incx, y, &i_incy); -#else - sswap_(&i_n, x, &i_incx, y, &i_incy); -#endif - return; - } -#endif - { - long i; - for(i = 0; i < n; i++) - { - real z = x[i*incx]; - x[i*incx] = y[i*incy]; - y[i*incy] = z; - } - } -} - -void THBlas_(scal)(long n, real a, real *x, long incx) -{ - if(n == 1) - incx = 1; - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (n <= INT_MAX) && (incx <= INT_MAX) ) - { - int i_n = (int)n; - int i_incx = (int)incx; - -#if defined(TH_REAL_IS_DOUBLE) - dscal_(&i_n, &a, x, &i_incx); -#else - sscal_(&i_n, &a, x, &i_incx); -#endif - return; - } -#endif - { - long i; - for(i = 0; i < n; i++) { - if (a == 0) { - x[i*incx] = 0; - } else { - x[i*incx] *= a; - } - } - } -} - -void THBlas_(copy)(long n, real *x, long incx, real *y, long incy) -{ - if(n == 1) - { - incx = 1; - incy = 1; - } - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) - { - int i_n = (int)n; - int i_incx = (int)incx; - int i_incy = (int)incy; - -#if defined(TH_REAL_IS_DOUBLE) - dcopy_(&i_n, x, &i_incx, y, &i_incy); -#else - scopy_(&i_n, x, &i_incx, y, &i_incy); -#endif - return; - } -#endif - { - long i; - for(i = 0; i < n; i++) - y[i*incy] = x[i*incx]; - } -} - -void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy) -{ - if(n == 1) - { - incx = 1; - incy = 1; - } - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) - { - int i_n = (int)n; - int i_incx = (int)incx; - int i_incy = (int)incy; - -#if defined(TH_REAL_IS_DOUBLE) - daxpy_(&i_n, &a, x, &i_incx, y, &i_incy); -#else - saxpy_(&i_n, &a, x, &i_incx, y, &i_incy); -#endif - return; - } -#endif - { - long i; - for(i = 0; i < n; i++) - y[i*incy] += a*x[i*incx]; - } -} - -real THBlas_(dot)(long n, real *x, long incx, real *y, long incy) -{ - if(n == 1) - { - incx = 1; - incy = 1; - } - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) - { - int i_n = (int)n; - int i_incx = (int)incx; - int i_incy = (int)incy; - -#if defined(TH_REAL_IS_DOUBLE) - return (real) ddot_(&i_n, x, &i_incx, y, &i_incy); -#else - return (real) sdot_(&i_n, x, &i_incx, y, &i_incy); -#endif - } -#endif - { - long i; - real sum = 0; - for(i = 0; i < n; i++) - sum += x[i*incx]*y[i*incy]; - return sum; - } -} - -void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy) -{ - if(n == 1) - lda = m; - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (m <= INT_MAX) && (n <= INT_MAX) && - (lda > 0) && (lda <= INT_MAX) && - (incx > 0) && (incx <= INT_MAX) && - (incy > 0) && (incy <= INT_MAX) ) - { - int i_m = (int)m; - int i_n = (int)n; - int i_lda = (int)lda; - int i_incx = (int)incx; - int i_incy = (int)incy; - -#if defined(TH_REAL_IS_DOUBLE) - dgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy); -#else - sgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy); -#endif - return; - } -#endif - { - long i, j; - - if( (trans == 'T') || (trans == 't') ) - { - for(i = 0; i < n; i++) - { - real sum = 0; - real *row_ = a+lda*i; - for(j = 0; j < m; j++) - sum += x[j*incx]*row_[j]; - if (beta == 0) - y[i*incy] = alpha*sum; - else - y[i*incy] = beta*y[i*incy] + alpha*sum; - } - } - else - { - if(beta != 1) - THBlas_(scal)(m, beta, y, incy); - - for(j = 0; j < n; j++) - { - real *column_ = a+lda*j; - real z = alpha*x[j*incx]; - for(i = 0; i < m; i++) - y[i*incy] += z*column_[i]; - } - } - } -} - -void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda) -{ - if(n == 1) - lda = m; - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) - { - int i_m = (int)m; - int i_n = (int)n; - int i_lda = (int)lda; - int i_incx = (int)incx; - int i_incy = (int)incy; - -#if defined(TH_REAL_IS_DOUBLE) - dger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda); -#else - sger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda); -#endif - return; - } -#endif - { - long i, j; - for(j = 0; j < n; j++) - { - real *column_ = a+j*lda; - real z = alpha*y[j*incy]; - for(i = 0; i < m; i++) - column_[i] += z*x[i*incx] ; - } - } -} - -void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc) -{ - int transa_ = ((transa == 't') || (transa == 'T')); - int transb_ = ((transb == 't') || (transb == 'T')); - - if(n == 1) - ldc = m; - - if(transa_) - { - if(m == 1) - lda = k; - } - else - { - if(k == 1) - lda = m; - } - - if(transb_) - { - if(k == 1) - ldb = n; - } - else - { - if(n == 1) - ldb = k; - } - -#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) - if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX) && (ldb <= INT_MAX) && (ldc <= INT_MAX) ) - { - int i_m = (int)m; - int i_n = (int)n; - int i_k = (int)k; - int i_lda = (int)lda; - int i_ldb = (int)ldb; - int i_ldc = (int)ldc; - -#if defined(TH_REAL_IS_DOUBLE) - dgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc); -#else - sgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc); -#endif - return; - } -#endif - { - long i, j, l; - if(!transa_ && !transb_) - { - real *a_ = a; - for(i = 0; i < m; i++) - { - real *b_ = b; - for(j = 0; j < n; j++) - { - real sum = 0; - for(l = 0; l < k; l++) - sum += a_[l*lda]*b_[l]; - b_ += ldb; - if (beta == 0) - c[j*ldc+i] = alpha*sum; - else - c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; - } - a_++; - } - } - else if(transa_ && !transb_) - { - real *a_ = a; - for(i = 0; i < m; i++) - { - real *b_ = b; - for(j = 0; j < n; j++) - { - real sum = 0; - for(l = 0; l < k; l++) - sum += a_[l]*b_[l]; - b_ += ldb; - if (beta == 0) - c[j*ldc+i] = alpha*sum; - else - c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; - } - a_ += lda; - } - } - else if(!transa_ && transb_) - { - real *a_ = a; - for(i = 0; i < m; i++) - { - real *b_ = b; - for(j = 0; j < n; j++) - { - real sum = 0; - for(l = 0; l < k; l++) - sum += a_[l*lda]*b_[l*ldb]; - b_++; - if (beta == 0) - c[j*ldc+i] = alpha*sum; - else - c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; - } - a_++; - } - } - else - { - real *a_ = a; - for(i = 0; i < m; i++) - { - real *b_ = b; - for(j = 0; j < n; j++) - { - real sum = 0; - for(l = 0; l < k; l++) - sum += a_[l]*b_[l*ldb]; - b_++; - if (beta == 0) - c[j*ldc+i] = alpha*sum; - else - c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum; - } - a_ += lda; - } - } - } -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.h b/contrib/lua-torch/torch7/lib/TH/generic/THBlas.h deleted file mode 100644 index 9e14f5a84..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THBlas.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THBlas.h" -#else - -/* Level 1 */ -TH_API void THBlas_(swap)(long n, real *x, long incx, real *y, long incy); -TH_API void THBlas_(scal)(long n, real a, real *x, long incx); -TH_API void THBlas_(copy)(long n, real *x, long incx, real *y, long incy); -TH_API void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy); -TH_API real THBlas_(dot)(long n, real *x, long incx, real *y, long incy); - -/* Level 2 */ -TH_API void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy); -TH_API void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda); - -/* Level 3 */ -TH_API void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.c b/contrib/lua-torch/torch7/lib/TH/generic/THLapack.c deleted file mode 100644 index 148ae26c4..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.c +++ /dev/null @@ -1,270 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THLapack.c" -#else - - -TH_EXTERNC void dgesv_(int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info); -TH_EXTERNC void sgesv_(int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info); -TH_EXTERNC void dtrtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info); -TH_EXTERNC void strtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info); -TH_EXTERNC void dgels_(char *trans, int *m, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, double *work, int *lwork, int *info); -TH_EXTERNC void sgels_(char *trans, int *m, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, float *work, int *lwork, int *info); -TH_EXTERNC void dsyev_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info); -TH_EXTERNC void ssyev_(char *jobz, char *uplo, int *n, float *a, int *lda, float *w, float *work, int *lwork, int *info); -TH_EXTERNC void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda, double *wr, double *wi, double* vl, int *ldvl, double *vr, int *ldvr, double *work, int *lwork, int *info); -TH_EXTERNC void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda, float *wr, float *wi, float* vl, int *ldvl, float *vr, int *ldvr, float *work, int *lwork, int *info); -TH_EXTERNC void dgesvd_(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info); -TH_EXTERNC void sgesvd_(char *jobu, char *jobvt, int *m, int *n, float *a, int *lda, float *s, float *u, int *ldu, float *vt, int *ldvt, float *work, int *lwork, int *info); -TH_EXTERNC void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info); -TH_EXTERNC void sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info); -TH_EXTERNC void dgetrs_(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info); -TH_EXTERNC void sgetrs_(char *trans, int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info); -TH_EXTERNC void dgetri_(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); -TH_EXTERNC void sgetri_(int *n, float *a, int *lda, int *ipiv, float *work, int *lwork, int *info); -TH_EXTERNC void dpotrf_(char *uplo, int *n, double *a, int *lda, int *info); -TH_EXTERNC void spotrf_(char *uplo, int *n, float *a, int *lda, int *info); -TH_EXTERNC void dpotri_(char *uplo, int *n, double *a, int *lda, int *info); -TH_EXTERNC void spotri_(char *uplo, int *n, float *a, int *lda, int *info); -TH_EXTERNC void dpotrs_(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info); -TH_EXTERNC void spotrs_(char *uplo, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info); -TH_EXTERNC void sgeqrf_(int *m, int *n, float *a, int *lda, float *tau, float *work, int *lwork, int *info); -TH_EXTERNC void dgeqrf_(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info); -TH_EXTERNC void sorgqr_(int *m, int *n, int *k, float *a, int *lda, float *tau, float *work, int *lwork, int *info); -TH_EXTERNC void dorgqr_(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info); -TH_EXTERNC void sormqr_(char *side, char *trans, int *m, int *n, int *k, float *a, int *lda, float *tau, float *c, int *ldc, float *work, int *lwork, int *info); -TH_EXTERNC void dormqr_(char *side, char *trans, int *m, int *n, int *k, double *a, int *lda, double *tau, double *c, int *ldc, double *work, int *lwork, int *info); -TH_EXTERNC void spstrf_(char *uplo, int *n, float *a, int *lda, int *piv, int *rank, float *tol, float *work, int *info); -TH_EXTERNC void dpstrf_(char *uplo, int *n, double *a, int *lda, int *piv, int *rank, double *tol, double *work, int *info); - - -/* Compute the solution to a real system of linear equations A * X = B */ -void THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info); -#else - sgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info); -#endif -#else - THError("gesv : Lapack library not found in compile time\n"); -#endif - return; -} - -/* Solve a triangular system of the form A * X = B or A^T * X = B */ -void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info); -#else - strtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info); -#endif -#else - THError("trtrs : Lapack library not found in compile time\n"); -#endif - return; -} - -/* Solve overdetermined or underdetermined real linear systems involving an -M-by-N matrix A, or its transpose, using a QR or LQ factorization of A */ -void THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info); -#else - sgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info); -#endif -#else - THError("gels : Lapack library not found in compile time\n"); -#endif -} - -/* Compute all eigenvalues and, optionally, eigenvectors of a real symmetric -matrix A */ -void THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dsyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info); -#else - ssyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info); -#endif -#else - THError("syev : Lapack library not found in compile time\n"); -#endif -} - -/* Compute for an N-by-N real nonsymmetric matrix A, the eigenvalues and, -optionally, the left and/or right eigenvectors */ -void THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info); -#else - sgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info); -#endif -#else - THError("geev : Lapack library not found in compile time\n"); -#endif -} - -/* Compute the singular value decomposition (SVD) of a real M-by-N matrix A, -optionally computing the left and/or right singular vectors */ -void THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgesvd_( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info); -#else - sgesvd_( &jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, info); -#endif -#else - THError("gesvd : Lapack library not found in compile time\n"); -#endif -} - -/* LU decomposition */ -void THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgetrf_(&m, &n, a, &lda, ipiv, info); -#else - sgetrf_(&m, &n, a, &lda, ipiv, info); -#endif -#else - THError("getrf : Lapack library not found in compile time\n"); -#endif -} - -void THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info); -#else - sgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info); -#endif -#else - THError("getrs : Lapack library not found in compile time\n"); -#endif -} - -/* Matrix Inverse */ -void THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgetri_(&n, a, &lda, ipiv, work, &lwork, info); -#else - sgetri_(&n, a, &lda, ipiv, work, &lwork, info); -#endif -#else - THError("getri : Lapack library not found in compile time\n"); -#endif -} - -/* Cholesky factorization */ -void THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dpotrf_(&uplo, &n, a, &lda, info); -#else - spotrf_(&uplo, &n, a, &lda, info); -#endif -#else - THError("potrf : Lapack library not found in compile time\n"); -#endif -} - -/* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */ -void THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dpotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info); -#else - spotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info); -#endif -#else - THError("potrs: Lapack library not found in compile time\n"); -#endif -} - -/* Cholesky factorization based Matrix Inverse */ -void THLapack_(potri)(char uplo, int n, real *a, int lda, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dpotri_(&uplo, &n, a, &lda, info); -#else - spotri_(&uplo, &n, a, &lda, info); -#endif -#else - THError("potri: Lapack library not found in compile time\n"); -#endif -} - -/* Cholesky factorization with complete pivoting */ -void THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dpstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info); -#else - spstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info); -#endif -#else - THError("pstrf: Lapack library not found at compile time\n"); -#endif -} - -/* QR decomposition */ -void THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info); -#else - sgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info); -#endif -#else - THError("geqrf: Lapack library not found in compile time\n"); -#endif -} - -/* Build Q from output of geqrf */ -void THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info); -#else - sorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info); -#endif -#else - THError("orgqr: Lapack library not found in compile time\n"); -#endif -} - -/* Multiply Q with a matrix using the output of geqrf */ -void THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info) -{ -#ifdef USE_LAPACK -#if defined(TH_REAL_IS_DOUBLE) - dormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); -#else - sormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); -#endif -#else - THError("ormqr: Lapack library not found in compile time\n"); -#endif -} - - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.h b/contrib/lua-torch/torch7/lib/TH/generic/THLapack.h deleted file mode 100644 index b464dd2d2..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THLapack.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THLapack.h" -#else - -/* AX=B */ -TH_API void THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info); -/* Solve a triangular system of the form A * X = B or A^T * X = B */ -TH_API void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info); -/* ||AX-B|| */ -TH_API void THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info); -/* Eigenvals */ -TH_API void THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info); -/* Non-sym eigenvals */ -TH_API void THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info); -/* svd */ -TH_API void THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info); -/* LU decomposition */ -TH_API void THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info); -TH_API void THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info); -/* Matrix Inverse */ -TH_API void THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info); - -/* Positive Definite matrices */ -/* Cholesky factorization */ -void THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info); -/* Matrix inverse based on Cholesky factorization */ -void THLapack_(potri)(char uplo, int n, real *a, int lda, int *info); -/* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */ -void THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info); -/* Cholesky factorization with complete pivoting. */ -void THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info); - -/* QR decomposition */ -void THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info); -/* Build Q from output of geqrf */ -void THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info); -/* Multiply Q with a matrix from output of geqrf */ -void THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.c b/contrib/lua-torch/torch7/lib/TH/generic/THStorage.c deleted file mode 100644 index a592cfb62..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.c +++ /dev/null @@ -1,226 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THStorage.c" -#else - -real* THStorage_(data)(const THStorage *self) -{ - return self->data; -} - -ptrdiff_t THStorage_(size)(const THStorage *self) -{ - return self->size; -} - -size_t THStorage_(elementSize)() -{ - return sizeof(real); -} - -THStorage* THStorage_(new)(void) -{ - return THStorage_(newWithSize)(0); -} - -THStorage* THStorage_(newWithSize)(ptrdiff_t size) -{ - return THStorage_(newWithAllocator)(size, &THDefaultAllocator, NULL); -} - -THStorage* THStorage_(newWithAllocator)(ptrdiff_t size, - THAllocator *allocator, - void *allocatorContext) -{ - THStorage *storage = THAlloc(sizeof(THStorage)); - storage->data = allocator->malloc(allocatorContext, sizeof(real)*size); - storage->size = size; - storage->refcount = 1; - storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; - storage->allocator = allocator; - storage->allocatorContext = allocatorContext; - return storage; -} - -THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags) -{ - THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags); - - THStorage *storage = THStorage_(newWithAllocator)(size, - &THMapAllocator, - ctx); - - if(size <= 0) - storage->size = THMapAllocatorContext_size(ctx)/sizeof(real); - - THStorage_(clearFlag)(storage, TH_STORAGE_RESIZABLE); - - return storage; -} - -THStorage* THStorage_(newWithSize1)(real data0) -{ - THStorage *self = THStorage_(newWithSize)(1); - self->data[0] = data0; - return self; -} - -THStorage* THStorage_(newWithSize2)(real data0, real data1) -{ - THStorage *self = THStorage_(newWithSize)(2); - self->data[0] = data0; - self->data[1] = data1; - return self; -} - -THStorage* THStorage_(newWithSize3)(real data0, real data1, real data2) -{ - THStorage *self = THStorage_(newWithSize)(3); - self->data[0] = data0; - self->data[1] = data1; - self->data[2] = data2; - return self; -} - -THStorage* THStorage_(newWithSize4)(real data0, real data1, real data2, real data3) -{ - THStorage *self = THStorage_(newWithSize)(4); - self->data[0] = data0; - self->data[1] = data1; - self->data[2] = data2; - self->data[3] = data3; - return self; -} - -void THStorage_(setFlag)(THStorage *storage, const char flag) -{ - storage->flag |= flag; -} - -void THStorage_(clearFlag)(THStorage *storage, const char flag) -{ - storage->flag &= ~flag; -} - -void THStorage_(retain)(THStorage *storage) -{ - if(storage && (storage->flag & TH_STORAGE_REFCOUNTED)) - THAtomicIncrementRef(&storage->refcount); -} - -void THStorage_(free)(THStorage *storage) -{ - if(!storage) - return; - - if((storage->flag & TH_STORAGE_REFCOUNTED) && (THAtomicGet(&storage->refcount) > 0)) - { - if(THAtomicDecrementRef(&storage->refcount)) - { - if(storage->flag & TH_STORAGE_FREEMEM) { - storage->allocator->free(storage->allocatorContext, storage->data); - } - if(storage->flag & TH_STORAGE_VIEW) { - THStorage_(free)(storage->view); - } - THFree(storage); - } - } -} - -THStorage* THStorage_(newWithData)(real *data, ptrdiff_t size) -{ - return THStorage_(newWithDataAndAllocator)(data, size, - &THDefaultAllocator, NULL); -} - -THStorage* THStorage_(newWithDataAndAllocator)(real* data, ptrdiff_t size, - THAllocator* allocator, - void* allocatorContext) { - THStorage *storage = THAlloc(sizeof(THStorage)); - storage->data = data; - storage->size = size; - storage->refcount = 1; - storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; - storage->allocator = allocator; - storage->allocatorContext = allocatorContext; - return storage; -} - -void THStorage_(resize)(THStorage *storage, ptrdiff_t size) -{ - if(storage->flag & TH_STORAGE_RESIZABLE) - { - if(storage->allocator->realloc == NULL) { - /* case when the allocator does not have a realloc defined */ - real *old_data = storage->data; - ptrdiff_t old_size = storage->size; - if (size == 0) { - storage->data = NULL; - } else { - storage->data = storage->allocator->malloc( - storage->allocatorContext, - sizeof(real)*size); - } - storage->size = size; - if (old_data != NULL) { - ptrdiff_t copy_size = old_size; - if (storage->size < copy_size) { - copy_size = storage->size; - } - if (copy_size > 0) { - memcpy(storage->data, old_data, sizeof(real)*copy_size); - } - storage->allocator->free(storage->allocatorContext, old_data); - } - } else { - storage->data = storage->allocator->realloc( - storage->allocatorContext, - storage->data, - sizeof(real)*size); - storage->size = size; - } - } else { - THError("Trying to resize storage that is not resizable"); - } -} - -void THStorage_(fill)(THStorage *storage, real value) -{ - ptrdiff_t i; - for(i = 0; i < storage->size; i++) - storage->data[i] = value; -} - -void THStorage_(set)(THStorage *self, ptrdiff_t idx, real value) -{ - THArgCheck((idx >= 0) && (idx < self->size), 2, "out of bounds"); - self->data[idx] = value; -} - -real THStorage_(get)(const THStorage *self, ptrdiff_t idx) -{ - THArgCheck((idx >= 0) && (idx < self->size), 2, "out of bounds"); - return self->data[idx]; -} - -void THStorage_(swap)(THStorage *storage1, THStorage *storage2) -{ -#define SWAP(val) { val = storage1->val; storage1->val = storage2->val; storage2->val = val; } - real *data; - ptrdiff_t size; - char flag; - THAllocator *allocator; - void *allocatorContext; - struct THStorage *view; - - SWAP(data); - SWAP(size); - SWAP(flag); - // don't swap refcount! - SWAP(allocator); - SWAP(allocatorContext); - SWAP(view); -#undef SWAP -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.h b/contrib/lua-torch/torch7/lib/TH/generic/THStorage.h deleted file mode 100644 index 3dd214b33..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THStorage.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THStorage.h" -#else - -/* on pourrait avoir un liste chainee - qui initialise math, lab structures (or more). - mouais -- complique. - - Pb: THMapStorage is kind of a class - THLab_()... comment je m'en sors? - - en template, faudrait que je les instancie toutes!!! oh boy! - Et comment je sais que c'est pour Cuda? Le type float est le meme dans les <> - - au bout du compte, ca serait sur des pointeurs float/double... etc... = facile. - primitives?? - */ - -#define TH_STORAGE_REFCOUNTED 1 -#define TH_STORAGE_RESIZABLE 2 -#define TH_STORAGE_FREEMEM 4 -#define TH_STORAGE_VIEW 8 - -typedef struct THStorage -{ - real *data; - ptrdiff_t size; - int refcount; - char flag; - THAllocator *allocator; - void *allocatorContext; - struct THStorage *view; -} THStorage; - -TH_API real* THStorage_(data)(const THStorage*); -TH_API ptrdiff_t THStorage_(size)(const THStorage*); -TH_API size_t THStorage_(elementSize)(void); - -/* slow access -- checks everything */ -TH_API void THStorage_(set)(THStorage*, ptrdiff_t, real); -TH_API real THStorage_(get)(const THStorage*, ptrdiff_t); - -TH_API THStorage* THStorage_(new)(void); -TH_API THStorage* THStorage_(newWithSize)(ptrdiff_t size); -TH_API THStorage* THStorage_(newWithSize1)(real); -TH_API THStorage* THStorage_(newWithSize2)(real, real); -TH_API THStorage* THStorage_(newWithSize3)(real, real, real); -TH_API THStorage* THStorage_(newWithSize4)(real, real, real, real); -TH_API THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags); - -/* takes ownership of data */ -TH_API THStorage* THStorage_(newWithData)(real *data, ptrdiff_t size); - -TH_API THStorage* THStorage_(newWithAllocator)(ptrdiff_t size, - THAllocator* allocator, - void *allocatorContext); -TH_API THStorage* THStorage_(newWithDataAndAllocator)( - real* data, ptrdiff_t size, THAllocator* allocator, void *allocatorContext); - -/* should not differ with API */ -TH_API void THStorage_(setFlag)(THStorage *storage, const char flag); -TH_API void THStorage_(clearFlag)(THStorage *storage, const char flag); -TH_API void THStorage_(retain)(THStorage *storage); -TH_API void THStorage_(swap)(THStorage *storage1, THStorage *storage2); - -/* might differ with other API (like CUDA) */ -TH_API void THStorage_(free)(THStorage *storage); -TH_API void THStorage_(resize)(THStorage *storage, ptrdiff_t size); -TH_API void THStorage_(fill)(THStorage *storage, real value); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.c b/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.c deleted file mode 100644 index ce4b57eaf..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.c +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THStorageCopy.c" -#else - -void THStorage_(rawCopy)(THStorage *storage, real *src) -{ - ptrdiff_t i; - for(i = 0; i < storage->size; i++) - storage->data[i] = src[i]; -} - -void THStorage_(copy)(THStorage *storage, THStorage *src) -{ - THArgCheck(storage->size == src->size, 2, "size mismatch"); - THStorage_(rawCopy)(storage, src->data); -} - -#define IMPLEMENT_THStorage_COPY(TYPENAMESRC) \ -void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ -{ \ - ptrdiff_t i; \ - for(i = 0; i < storage->size; i++) \ - storage->data[i] = (real)src->data[i]; \ -} - -#define IMPLEMENT_THStorage_COPY_FROM_HALF(TYPENAMESRC) \ -void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ -{ \ - THArgCheck(storage->size == src->size, 2, "size mismatch"); \ - ptrdiff_t i; \ - for(i = 0; i < storage->size; i++) \ - storage->data[i] = (real)TH_half2float(src->data[i]); \ -} - -#define IMPLEMENT_THStorage_COPY_TO_HALF(TYPENAMESRC) \ -void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ -{ \ - THArgCheck(storage->size == src->size, 2, "size mismatch"); \ - ptrdiff_t i; \ - for(i = 0; i < storage->size; i++) \ - storage->data[i] = TH_float2half((float)(src->data[i])); \ -} - -#define IMPLEMENT_THStorage_COPY_TO_FROM_HALF(TYPENAMESRC) \ -void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \ -{ \ - THArgCheck(storage->size == src->size, 2, "size mismatch"); \ - ptrdiff_t i; \ - for(i = 0; i < storage->size; i++) \ - storage->data[i] = src->data[i]; \ -} - -#ifndef TH_REAL_IS_HALF -IMPLEMENT_THStorage_COPY(Byte) -IMPLEMENT_THStorage_COPY(Char) -IMPLEMENT_THStorage_COPY(Short) -IMPLEMENT_THStorage_COPY(Int) -IMPLEMENT_THStorage_COPY(Long) -IMPLEMENT_THStorage_COPY(Float) -IMPLEMENT_THStorage_COPY(Double) -IMPLEMENT_THStorage_COPY_FROM_HALF(Half) -#else -/* only allow pass-through for Half */ -IMPLEMENT_THStorage_COPY_TO_FROM_HALF(Half) -IMPLEMENT_THStorage_COPY_TO_HALF(Byte) -IMPLEMENT_THStorage_COPY_TO_HALF(Char) -IMPLEMENT_THStorage_COPY_TO_HALF(Short) -IMPLEMENT_THStorage_COPY_TO_HALF(Int) -IMPLEMENT_THStorage_COPY_TO_HALF(Long) -IMPLEMENT_THStorage_COPY_TO_HALF(Float) -IMPLEMENT_THStorage_COPY_TO_HALF(Double) -#endif - - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.h b/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.h deleted file mode 100644 index ce8a2a690..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THStorageCopy.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THStorageCopy.h" -#else - -/* Support for copy between different Storage types */ - -TH_API void THStorage_(rawCopy)(THStorage *storage, real *src); -TH_API void THStorage_(copy)(THStorage *storage, THStorage *src); -TH_API void THStorage_(copyByte)(THStorage *storage, struct THByteStorage *src); -TH_API void THStorage_(copyChar)(THStorage *storage, struct THCharStorage *src); -TH_API void THStorage_(copyShort)(THStorage *storage, struct THShortStorage *src); -TH_API void THStorage_(copyInt)(THStorage *storage, struct THIntStorage *src); -TH_API void THStorage_(copyLong)(THStorage *storage, struct THLongStorage *src); -TH_API void THStorage_(copyFloat)(THStorage *storage, struct THFloatStorage *src); -TH_API void THStorage_(copyDouble)(THStorage *storage, struct THDoubleStorage *src); -TH_API void THStorage_(copyHalf)(THStorage *storage, struct THHalfStorage *src); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensor.c deleted file mode 100644 index e44e06ec3..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.c +++ /dev/null @@ -1,939 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensor.c" -#else - -/**** access methods ****/ -THStorage *THTensor_(storage)(const THTensor *self) -{ - return self->storage; -} - -ptrdiff_t THTensor_(storageOffset)(const THTensor *self) -{ - return self->storageOffset; -} - -int THTensor_(nDimension)(const THTensor *self) -{ - return self->nDimension; -} - -long THTensor_(size)(const THTensor *self, int dim) -{ - THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "dimension %d out of range of %dD tensor", - dim+TH_INDEX_BASE, THTensor_(nDimension)(self)); - return self->size[dim]; -} - -long THTensor_(stride)(const THTensor *self, int dim) -{ - THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "dimension %d out of range of %dD tensor", - dim+TH_INDEX_BASE, THTensor_(nDimension)(self)); - return self->stride[dim]; -} - -THLongStorage *THTensor_(newSizeOf)(THTensor *self) -{ - THLongStorage *size = THLongStorage_newWithSize(self->nDimension); - THLongStorage_rawCopy(size, self->size); - return size; -} - -THLongStorage *THTensor_(newStrideOf)(THTensor *self) -{ - THLongStorage *stride = THLongStorage_newWithSize(self->nDimension); - THLongStorage_rawCopy(stride, self->stride); - return stride; -} - -real *THTensor_(data)(const THTensor *self) -{ - if(self->storage) - return (self->storage->data+self->storageOffset); - else - return NULL; -} - -void THTensor_(setFlag)(THTensor *self, const char flag) -{ - self->flag |= flag; -} - -void THTensor_(clearFlag)(THTensor *self, const char flag) -{ - self->flag &= ~flag; -} - -/**** creation methods ****/ - -static void THTensor_(rawInit)(THTensor *self); - - -/* Empty init */ -THTensor *THTensor_(new)(void) -{ - THTensor *self = THAlloc(sizeof(THTensor)); - THTensor_(rawInit)(self); - return self; -} - -/* Pointer-copy init */ -THTensor *THTensor_(newWithTensor)(THTensor *tensor) -{ - THTensor *self = THAlloc(sizeof(THTensor)); - THTensor_(rawInit)(self); - THTensor_(setStorageNd)(self, - tensor->storage, - tensor->storageOffset, - tensor->nDimension, - tensor->size, - tensor->stride); - return self; -} - -/* Storage init */ -THTensor *THTensor_(newWithStorage)(THStorage *storage, ptrdiff_t storageOffset, THLongStorage *size, THLongStorage *stride) -{ - THTensor *self = THAlloc(sizeof(THTensor)); - if(size && stride) - THArgCheck(size->size == stride->size, 4, "inconsistent size"); - - THTensor_(rawInit)(self); -#ifdef DEBUG - THAssert((size ? size->size : (stride ? stride->size : 0)) <= INT_MAX); -#endif - THTensor_(setStorageNd)(self, - storage, - storageOffset, - (size ? size->size : (stride ? stride->size : 0)), - (size ? size->data : NULL), - (stride ? stride->data : NULL)); - - return self; -} -THTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset, - long size0, long stride0) -{ - return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1); -} - -THTensor *THTensor_(newWithStorage2d)(THStorage *storage, ptrdiff_t storageOffset, - long size0, long stride0, - long size1, long stride1) -{ - return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1); -} - -THTensor *THTensor_(newWithStorage3d)(THStorage *storage, ptrdiff_t storageOffset, - long size0, long stride0, - long size1, long stride1, - long size2, long stride2) -{ - return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1); -} - -THTensor *THTensor_(newWithStorage4d)(THStorage *storage, ptrdiff_t storageOffset, - long size0, long stride0, - long size1, long stride1, - long size2, long stride2, - long size3, long stride3) -{ - long size[4] = {size0, size1, size2, size3}; - long stride[4] = {stride0, stride1, stride2, stride3}; - - THTensor *self = THAlloc(sizeof(THTensor)); - THTensor_(rawInit)(self); - THTensor_(setStorageNd)(self, storage, storageOffset, 4, size, stride); - - return self; -} - -THTensor *THTensor_(newWithSize)(THLongStorage *size, THLongStorage *stride) -{ - return THTensor_(newWithStorage)(NULL, 0, size, stride); -} - -THTensor *THTensor_(newWithSize1d)(long size0) -{ - return THTensor_(newWithSize4d)(size0, -1, -1, -1); -} - -THTensor *THTensor_(newWithSize2d)(long size0, long size1) -{ - return THTensor_(newWithSize4d)(size0, size1, -1, -1); -} - -THTensor *THTensor_(newWithSize3d)(long size0, long size1, long size2) -{ - return THTensor_(newWithSize4d)(size0, size1, size2, -1); -} - -THTensor *THTensor_(newWithSize4d)(long size0, long size1, long size2, long size3) -{ - long size[4] = {size0, size1, size2, size3}; - - THTensor *self = THAlloc(sizeof(THTensor)); - THTensor_(rawInit)(self); - THTensor_(resizeNd)(self, 4, size, NULL); - - return self; -} - -THTensor *THTensor_(newClone)(THTensor *self) -{ - THTensor *tensor = THTensor_(new)(); - THTensor_(resizeAs)(tensor, self); - THTensor_(copy)(tensor, self); - return tensor; -} - -THTensor *THTensor_(newContiguous)(THTensor *self) -{ - if(!THTensor_(isContiguous)(self)) - return THTensor_(newClone)(self); - else - { - THTensor_(retain)(self); - return self; - } -} - -THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_) -{ - THTensor *self = THTensor_(newWithTensor)(tensor); - THTensor_(select)(self, NULL, dimension_, sliceIndex_); - return self; -} - -THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_) -{ - THTensor *self = THTensor_(newWithTensor)(tensor); - THTensor_(narrow)(self, NULL, dimension_, firstIndex_, size_); - return self; -} - -THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_) -{ - THTensor *self = THTensor_(newWithTensor)(tensor); - THTensor_(transpose)(self, NULL, dimension1_, dimension2_); - return self; -} - -THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_) -{ - THTensor *self = THTensor_(newWithTensor)(tensor); - THTensor_(unfold)(self, NULL, dimension_, size_, step_); - return self; -} - -THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size) -{ - THArgCheck(THTensor_(isContiguous)(tensor), 1, "input is not contiguous"); - ptrdiff_t numel = THTensor_(nElement)(tensor); - THTensor *self = THTensor_(new)(); - THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel); - THTensor_(setStorage)(self, tensor->storage, tensor->storageOffset, inferred_size, NULL); - THLongStorage_free(inferred_size); - return self; -} - -/* Resize */ -void THTensor_(resize)(THTensor *self, THLongStorage *size, THLongStorage *stride) -{ - THArgCheck(size != NULL, 2, "invalid size"); - if(stride) - THArgCheck(stride->size == size->size, 3, "invalid stride"); - -#ifdef DEBUG - THAssert(size->size <= INT_MAX); -#endif - THTensor_(resizeNd)(self, size->size, size->data, (stride ? stride->data : NULL)); -} - -void THTensor_(resizeAs)(THTensor *self, THTensor *src) -{ - if(!THTensor_(isSameSizeAs)(self, src)) - THTensor_(resizeNd)(self, src->nDimension, src->size, NULL); -} - -void THTensor_(resize1d)(THTensor *tensor, long size0) -{ - THTensor_(resize4d)(tensor, size0, -1, -1, -1); -} - -void THTensor_(resize2d)(THTensor *tensor, long size0, long size1) -{ - THTensor_(resize4d)(tensor, size0, size1, -1, -1); -} - -void THTensor_(resize3d)(THTensor *tensor, long size0, long size1, long size2) -{ - THTensor_(resize4d)(tensor, size0, size1, size2, -1); -} - -void THTensor_(resize4d)(THTensor *self, long size0, long size1, long size2, long size3) -{ - long size[4] = {size0, size1, size2, size3}; - - THTensor_(resizeNd)(self, 4, size, NULL); -} - -void THTensor_(resize5d)(THTensor *self, long size0, long size1, long size2, long size3, long size4) -{ - long size[5] = {size0, size1, size2, size3, size4}; - - THTensor_(resizeNd)(self, 5, size, NULL); -} - -THTensor* THTensor_(newExpand)(THTensor *tensor, THLongStorage *sizes) { - THTensor *result = THTensor_(new)(); - THTensor_(expand)(result, tensor, sizes); - return result; -} - -void THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *sizes) { - THArgCheck(THTensor_(nDimension)(tensor) > 0, 0, "can't expand an empty tensor"); - THArgCheck(THLongStorage_size(sizes) >= THTensor_(nDimension)(tensor), 1, - "the number of sizes provided must be greater or equal to the " - "number of dimensions in the tensor"); - - long *expandedSizes; - long *expandedStrides; - char error_buffer[1024]; - int ret = - THLongStorage_inferExpandGeometry(tensor->size, tensor->stride, THTensor_(nDimension)(tensor), - sizes, &expandedSizes, &expandedStrides, error_buffer, 1024); - - if (ret != 0) { - THError(error_buffer); - return; - } - - THTensor_(setStorageNd)(r, THTensor_(storage)(tensor), THTensor_(storageOffset)(tensor), - THLongStorage_size(sizes), expandedSizes, expandedStrides); - THFree(expandedSizes); - THFree(expandedStrides); -} - - -void THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count) { - for (int i = 0; i < count; ++i) { - THArgCheck(THTensor_(nDimension)(ops[i]) > 0, i, "can't expand empty tensor %d", i); - } - - long *op_sizes[count]; - long op_dims[count]; - - for (int i = 0; i < count; ++i) { - op_sizes[i] = ops[i]->size; - op_dims[i] = ops[i]->nDimension; - } - - THLongStorage *sizes = THLongStorage_new(); - char error_buffer[1024]; - int ret = THLongStorage_inferSizeN(sizes, - count, - op_sizes, - op_dims, - error_buffer, - 1024); - - if(ret != 0) { - THLongStorage_free(sizes); - THError(error_buffer); - return; - } - - for (int i = 0; i < count; ++i) { - THTensor_(expand)(rets[i], ops[i], sizes); - } - - THLongStorage_free(sizes); -} - -void THTensor_(set)(THTensor *self, THTensor *src) -{ - if(self != src) - THTensor_(setStorageNd)(self, - src->storage, - src->storageOffset, - src->nDimension, - src->size, - src->stride); -} - -void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_) -{ - if(size_ && stride_) - THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes"); - -#ifdef DEBUG - THAssert((size_ ? size_->size : (stride_ ? stride_->size : 0)) <= INT_MAX); -#endif - THTensor_(setStorageNd)(self, - storage_, - storageOffset_, - (size_ ? size_->size : (stride_ ? stride_->size : 0)), - (size_ ? size_->data : NULL), - (stride_ ? stride_->data : NULL)); -} - -void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_) -{ - THTensor_(setStorage4d)(self, storage_, storageOffset_, - size0_, stride0_, - -1, -1, - -1, -1, - -1, -1); -} - -void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_) -{ - THTensor_(setStorage4d)(self, storage_, storageOffset_, - size0_, stride0_, - size1_, stride1_, - -1, -1, - -1, -1); -} - -void THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_, - long size2_, long stride2_) -{ - THTensor_(setStorage4d)(self, storage_, storageOffset_, - size0_, stride0_, - size1_, stride1_, - size2_, stride2_, - -1, -1); -} - -void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_, - long size2_, long stride2_, - long size3_, long stride3_) -{ - - long size[4] = {size0_, size1_, size2_, size3_}; - long stride[4] = {stride0_, stride1_, stride2_, stride3_}; - - THTensor_(setStorageNd)(self, storage_, storageOffset_, 4, size, stride); -} - - -void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension, long firstIndex, long size) -{ - if(!src) - src = self; - - THArgCheck( (dimension >= 0) && (dimension < src->nDimension), 2, "out of range"); - THArgCheck( (firstIndex >= 0) && (firstIndex < src->size[dimension]), 3, "out of range"); - THArgCheck( (size > 0) && (firstIndex <= src->size[dimension] - size), 4, "out of range"); - - THTensor_(set)(self, src); - - if(firstIndex > 0) - self->storageOffset += firstIndex*self->stride[dimension]; - - self->size[dimension] = size; -} - -void THTensor_(select)(THTensor *self, THTensor *src, int dimension, long sliceIndex) -{ - int d; - - if(!src) - src = self; - - THArgCheck(src->nDimension > 1, 1, "cannot select on a vector"); - THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "out of range"); - THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 3, "out of range"); - - THTensor_(set)(self, src); - THTensor_(narrow)(self, NULL, dimension, sliceIndex, 1); - for(d = dimension; d < self->nDimension-1; d++) - { - self->size[d] = self->size[d+1]; - self->stride[d] = self->stride[d+1]; - } - self->nDimension--; -} - -void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1, int dimension2) -{ - long z; - - if(!src) - src = self; - - THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, "out of range"); - THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, "out of range"); - - THTensor_(set)(self, src); - - if(dimension1 == dimension2) - return; - - z = self->stride[dimension1]; - self->stride[dimension1] = self->stride[dimension2]; - self->stride[dimension2] = z; - z = self->size[dimension1]; - self->size[dimension1] = self->size[dimension2]; - self->size[dimension2] = z; -} - -void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension, long size, long step) -{ - long *newSize; - long *newStride; - int d; - - if(!src) - src = self; - - THArgCheck( (src->nDimension > 0), 1, "cannot unfold an empty tensor"); - THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "out of range"); - THArgCheck(size <= src->size[dimension], 3, "out of range"); - THArgCheck(step > 0, 4, "invalid step"); - - THTensor_(set)(self, src); - - newSize = THAlloc(sizeof(long)*(self->nDimension+1)); - newStride = THAlloc(sizeof(long)*(self->nDimension+1)); - - newSize[self->nDimension] = size; - newStride[self->nDimension] = self->stride[dimension]; - for(d = 0; d < self->nDimension; d++) - { - if(d == dimension) - { - newSize[d] = (self->size[d] - size) / step + 1; - newStride[d] = step*self->stride[d]; - } - else - { - newSize[d] = self->size[d]; - newStride[d] = self->stride[d]; - } - } - - THFree(self->size); - THFree(self->stride); - - self->size = newSize; - self->stride = newStride; - self->nDimension++; -} - -/* we have to handle the case where the result is a number */ -void THTensor_(squeeze)(THTensor *self, THTensor *src) -{ - int ndim = 0; - int d; - - if(!src) - src = self; - - THTensor_(set)(self, src); - - for(d = 0; d < src->nDimension; d++) - { - if(src->size[d] != 1) - { - if(d != ndim) - { - self->size[ndim] = src->size[d]; - self->stride[ndim] = src->stride[d]; - } - ndim++; - } - } - - /* right now, we do not handle 0-dimension tensors */ - if(ndim == 0 && src->nDimension > 0) - { - self->size[0] = 1; - self->stride[0] = 1; - ndim = 1; - } - self->nDimension = ndim; -} - -void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension) -{ - int d; - - if(!src) - src = self; - - THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, "dimension out of range"); - - THTensor_(set)(self, src); - - if(src->size[dimension] == 1 && src->nDimension > 1) - { - for(d = dimension; d < self->nDimension-1; d++) - { - self->size[d] = self->size[d+1]; - self->stride[d] = self->stride[d+1]; - } - self->nDimension--; - } -} - -void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension) -{ - int d; - - if(!src) - src = self; - - THArgCheck((dimension >= 0) && (dimension <= src->nDimension), 2, "dimension out of range"); - THArgCheck(src->nDimension > 0, 2, "cannot unsqueeze empty tensor"); - - THTensor_(set)(self, src); - - self->size = (long*)THRealloc(self->size, sizeof(long)*(self->nDimension+1)); - self->stride = (long*)THRealloc(self->stride, sizeof(long)*(self->nDimension+1)); - self->nDimension++; - for (d = self->nDimension-1; d > dimension; d--) { - self->size[d] = self->size[d-1]; - self->stride[d] = self->stride[d-1]; - } - if (dimension+1 < self->nDimension) { - self->stride[dimension] = self->size[dimension+1] * self->stride[dimension+1]; - } else { - self->stride[dimension] = 1; - } - self->size[dimension] = 1; -} - -int THTensor_(isTransposed)(const THTensor *self) -{ - if (THTensor_(isContiguous)(self)) { - return 0; - } - long max_stride = 1; - long size_max_stride = 1; - long z = 1; - int d; - for (d = 0; d < self->nDimension; ++d) { - if (self->stride[d] == 0 && self->size[d] != 1) - return 0; - if (self->stride[d] > max_stride) { - max_stride = self->stride[d]; - size_max_stride = self->size[d]; - } - z *= self->size[d]; - } - if (z == max_stride * size_max_stride) { - return 1; - } - return 0; -} - -int THTensor_(isContiguous)(const THTensor *self) -{ - long z = 1; - int d; - for(d = self->nDimension-1; d >= 0; d--) - { - if(self->size[d] != 1) - { - if(self->stride[d] == z) - z *= self->size[d]; - else - return 0; - } - } - return 1; -} - -int THTensor_(isSize)(const THTensor *self, const THLongStorage *dims) -{ - int d; - if (self->nDimension != dims->size) - return 0; - - for(d = 0; d < self->nDimension; ++d) - { - if(self->size[d] != dims->data[d]) - return 0; - } - return 1; -} - -int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor* src) -{ - int d; - if (self->nDimension != src->nDimension) - return 0; - for(d = 0; d < self->nDimension; ++d) - { - if(self->size[d] != src->size[d]) - return 0; - } - return 1; -} - -int THTensor_(isSetTo)(const THTensor *self, const THTensor* src) -{ - if (!self->storage) - return 0; - if (self->storage == src->storage && - self->storageOffset == src->storageOffset && - self->nDimension == src->nDimension) - { - int d; - for (d = 0; d < self->nDimension; ++d) - { - if (self->size[d] != src->size[d] || self->stride[d] != src->stride[d]) - return 0; - } - return 1; - } - return 0; -} - -ptrdiff_t THTensor_(nElement)(const THTensor *self) -{ - if(self->nDimension == 0) - return 0; - else - { - ptrdiff_t nElement = 1; - int d; - for(d = 0; d < self->nDimension; d++) - nElement *= self->size[d]; - return nElement; - } -} - -void THTensor_(retain)(THTensor *self) -{ - if(self->flag & TH_TENSOR_REFCOUNTED) - THAtomicIncrementRef(&self->refcount); -} - -void THTensor_(free)(THTensor *self) -{ - if(!self) - return; - - if(self->flag & TH_TENSOR_REFCOUNTED) - { - if(THAtomicDecrementRef(&self->refcount)) - { - THFree(self->size); - THFree(self->stride); - if(self->storage) - THStorage_(free)(self->storage); - THFree(self); - } - } -} - -void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst) -{ - if(self != dst) - THTensor_(copy)(dst, self); - - THTensor_(free)(self); -} - -/*******************************************************************************/ - -static void THTensor_(rawInit)(THTensor *self) -{ - self->refcount = 1; - self->storage = NULL; - self->storageOffset = 0; - self->size = NULL; - self->stride = NULL; - self->nDimension = 0; - self->flag = TH_TENSOR_REFCOUNTED; -} - -void THTensor_(setStorageNd)(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride) -{ - /* storage */ - if(self->storage != storage) - { - if(self->storage) - THStorage_(free)(self->storage); - - if(storage) - { - self->storage = storage; - THStorage_(retain)(self->storage); - } - else - self->storage = NULL; - } - - /* storageOffset */ - if(storageOffset < 0) - THError("Tensor: invalid storage offset"); - self->storageOffset = storageOffset; - - /* size and stride */ - THTensor_(resizeNd)(self, nDimension, size, stride); -} - -void THTensor_(resizeNd)(THTensor *self, int nDimension, long *size, long *stride) -{ - int d; - int nDimension_; - ptrdiff_t totalSize; - int hascorrectsize = 1; - - nDimension_ = 0; - for(d = 0; d < nDimension; d++) - { - if(size[d] > 0) - { - nDimension_++; - if((self->nDimension > d) && (size[d] != self->size[d])) - hascorrectsize = 0; - - if((self->nDimension > d) && stride && (stride[d] >= 0) && (stride[d] != self->stride[d])) - hascorrectsize = 0; - } - else - break; - } - nDimension = nDimension_; - - if(nDimension != self->nDimension) - hascorrectsize = 0; - - if(hascorrectsize) - return; - - if(nDimension > 0) - { - if(nDimension != self->nDimension) - { - self->size = THRealloc(self->size, sizeof(long)*nDimension); - self->stride = THRealloc(self->stride, sizeof(long)*nDimension); - self->nDimension = nDimension; - } - - totalSize = 1; - for(d = self->nDimension-1; d >= 0; d--) - { - self->size[d] = size[d]; - if(stride && (stride[d] >= 0) ) - self->stride[d] = stride[d]; - else - { - if(d == self->nDimension-1) - self->stride[d] = 1; - else - self->stride[d] = self->size[d+1]*self->stride[d+1]; - } - totalSize += (self->size[d]-1)*self->stride[d]; - } - - if(totalSize+self->storageOffset > 0) - { - if(!self->storage) - self->storage = THStorage_(new)(); - if(totalSize+self->storageOffset > self->storage->size) - THStorage_(resize)(self->storage, totalSize+self->storageOffset); - } - } - else - self->nDimension = 0; -} - -void THTensor_(set1d)(THTensor *tensor, long x0, real value) -{ - THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); - THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); - THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value); -} - -real THTensor_(get1d)(const THTensor *tensor, long x0) -{ - THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); - THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); - return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]); -} - -void THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value) -{ - THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); - THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); - THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value); -} - -real THTensor_(get2d)(const THTensor *tensor, long x0, long x1) -{ - THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); - THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); - return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]); -} - -void THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value) -{ - THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); - THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); - THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value); -} - -real THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2) -{ - THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); - THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); - return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]); -} - -void THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value) -{ - THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); - THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); - THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value); -} - -real THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3) -{ - THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); - THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); - return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]); -} - -THDescBuff THTensor_(desc)(const THTensor *tensor) { - const int L = TH_DESC_BUFF_LEN; - THDescBuff buf; - char *str = buf.str; - int n = 0; -#define _stringify(x) #x - n += snprintf(str, L-n, "torch." _stringify(x) "Tensor of size "); -#undef _stringify - int i; - for(i = 0; i < tensor->nDimension; i++) { - if(n >= L) break; - n += snprintf(str+n, L-n, "%ld", tensor->size[i]); - if(i < tensor->nDimension-1) { - n += snprintf(str+n, L-n, "x"); - } - } - if(n >= L) { - snprintf(str+L-4, 4, "..."); - } - return buf; -} - -THDescBuff THTensor_(sizeDesc)(const THTensor *tensor) { - THLongStorage *size = THTensor_(newSizeOf)((THTensor*)tensor); - THDescBuff buf = THLongStorage_sizeDesc(size); - THLongStorage_free(size); - return buf; -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensor.h deleted file mode 100644 index 9fb246c85..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensor.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensor.h" -#else - -/* a la lua? dim, storageoffset, ... et les methodes ? */ - -#define TH_TENSOR_REFCOUNTED 1 - -typedef struct THTensor -{ - long *size; - long *stride; - int nDimension; - - THStorage *storage; - ptrdiff_t storageOffset; - int refcount; - - char flag; - -} THTensor; - - -/**** access methods ****/ -TH_API THStorage* THTensor_(storage)(const THTensor *self); -TH_API ptrdiff_t THTensor_(storageOffset)(const THTensor *self); -TH_API int THTensor_(nDimension)(const THTensor *self); -TH_API long THTensor_(size)(const THTensor *self, int dim); -TH_API long THTensor_(stride)(const THTensor *self, int dim); -TH_API THLongStorage *THTensor_(newSizeOf)(THTensor *self); -TH_API THLongStorage *THTensor_(newStrideOf)(THTensor *self); -TH_API real *THTensor_(data)(const THTensor *self); - -TH_API void THTensor_(setFlag)(THTensor *self, const char flag); -TH_API void THTensor_(clearFlag)(THTensor *self, const char flag); - - -/**** creation methods ****/ -TH_API THTensor *THTensor_(new)(void); -TH_API THTensor *THTensor_(newWithTensor)(THTensor *tensor); -/* stride might be NULL */ -TH_API THTensor *THTensor_(newWithStorage)(THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_); -TH_API THTensor *THTensor_(newWithStorage1d)(THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_); -TH_API THTensor *THTensor_(newWithStorage2d)(THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_); -TH_API THTensor *THTensor_(newWithStorage3d)(THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_, - long size2_, long stride2_); -TH_API THTensor *THTensor_(newWithStorage4d)(THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_, - long size2_, long stride2_, - long size3_, long stride3_); - -/* stride might be NULL */ -TH_API THTensor *THTensor_(newWithSize)(THLongStorage *size_, THLongStorage *stride_); -TH_API THTensor *THTensor_(newWithSize1d)(long size0_); -TH_API THTensor *THTensor_(newWithSize2d)(long size0_, long size1_); -TH_API THTensor *THTensor_(newWithSize3d)(long size0_, long size1_, long size2_); -TH_API THTensor *THTensor_(newWithSize4d)(long size0_, long size1_, long size2_, long size3_); - -TH_API THTensor *THTensor_(newClone)(THTensor *self); -TH_API THTensor *THTensor_(newContiguous)(THTensor *tensor); -TH_API THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_); -TH_API THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_); -TH_API THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_); -TH_API THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_); -TH_API THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size); -TH_API THTensor *THTensor_(newExpand)(THTensor *tensor, THLongStorage *size); - -TH_API void THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *size); -TH_API void THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count); - -TH_API void THTensor_(resize)(THTensor *tensor, THLongStorage *size, THLongStorage *stride); -TH_API void THTensor_(resizeAs)(THTensor *tensor, THTensor *src); -TH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, long *size, long *stride); -TH_API void THTensor_(resize1d)(THTensor *tensor, long size0_); -TH_API void THTensor_(resize2d)(THTensor *tensor, long size0_, long size1_); -TH_API void THTensor_(resize3d)(THTensor *tensor, long size0_, long size1_, long size2_); -TH_API void THTensor_(resize4d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_); -TH_API void THTensor_(resize5d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_); - -TH_API void THTensor_(set)(THTensor *self, THTensor *src); -TH_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_); -TH_API void THTensor_(setStorageNd)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, int nDimension, long *size, long *stride); -TH_API void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_); -TH_API void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_); -TH_API void THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_, - long size2_, long stride2_); -TH_API void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, - long size0_, long stride0_, - long size1_, long stride1_, - long size2_, long stride2_, - long size3_, long stride3_); - -TH_API void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension_, long firstIndex_, long size_); -TH_API void THTensor_(select)(THTensor *self, THTensor *src, int dimension_, long sliceIndex_); -TH_API void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1_, int dimension2_); -TH_API void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension_, long size_, long step_); - -TH_API void THTensor_(squeeze)(THTensor *self, THTensor *src); -TH_API void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension_); -TH_API void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension_); - -TH_API int THTensor_(isContiguous)(const THTensor *self); -TH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src); -TH_API int THTensor_(isSetTo)(const THTensor *self, const THTensor *src); -TH_API int THTensor_(isSize)(const THTensor *self, const THLongStorage *dims); -TH_API ptrdiff_t THTensor_(nElement)(const THTensor *self); - -TH_API void THTensor_(retain)(THTensor *self); -TH_API void THTensor_(free)(THTensor *self); -TH_API void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst); - -/* Slow access methods [check everything] */ -TH_API void THTensor_(set1d)(THTensor *tensor, long x0, real value); -TH_API void THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value); -TH_API void THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value); -TH_API void THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value); - -TH_API real THTensor_(get1d)(const THTensor *tensor, long x0); -TH_API real THTensor_(get2d)(const THTensor *tensor, long x0, long x1); -TH_API real THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2); -TH_API real THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3); - -/* Debug methods */ -TH_API THDescBuff THTensor_(desc)(const THTensor *tensor); -TH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.c deleted file mode 100644 index 684ff9db5..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.c +++ /dev/null @@ -1,1957 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorConv.c" -#else - -/* - 2D Input, 2D kernel : convolve given image with the given kernel. -*/ -void THTensor_(validXCorr2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc) -{ - long or = (ir - kr) / sr + 1; - long oc = (ic - kc) / sc + 1; - - long xx, yy, kx, ky; - - if ((sc != 1) || (oc < 4)) { - /* regular convolution */ - for(yy = 0; yy < or; yy++) { - for(xx = 0; xx < oc; xx++) { - /* Dot product in two dimensions... (between input image and the mask) */ - real *pi_ = t_ + yy*sr*ic + xx*sc; - real *pw_ = k_; - real sum = 0; - for(ky = 0; ky < kr; ky++) { - for(kx = 0; kx < kc; kx++) { - sum += pi_[kx]*pw_[kx]; - } - pi_ += ic; /* next input line */ - pw_ += kc; /* next mask line */ - } - /* Update output */ - *r_++ += alpha*sum; - } - } - - } else { - /* SSE-based convolution */ - for(yy = 0; yy < or; yy++) { - real *pi_ = t_ + yy*sr*ic; - real *pw_ = k_; - for (ky = 0; ky < kr; ky++) { - real *pis_ = pi_; - for (kx = 0; kx < kc; kx++) { - THVector_(cadd)(r_, r_, pis_, alpha*pw_[kx], oc); - pis_++; - } - pi_ += ic; /* next input line */ - pw_ += kc; /* next mask line */ - } - r_ += oc; - } - } -} - -/* - 2D Input, 2D kernel : convolve given image with the given kernel. -*/ -void THTensor_(validConv2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc) -{ - long or = (ir - kr) / sr + 1; - long oc = (ic - kc) / sc + 1; - - long xx, yy, kx, ky; - - if ((sc != 1) || (oc < 4)) { - /* regular convolution */ - for(yy = 0; yy < or; yy++) { - for(xx = 0; xx < oc; xx++) { - /* Dot product in two dimensions... (between input image and the mask) */ - real *pi_ = t_ + yy*sr*ic + xx*sc; - real *pw_ = k_ + kr*kc - 1; - real sum = 0; - for(ky = 0; ky < kr; ky++) { - for(kx = 0; kx < kc; kx++) { - sum += pi_[kx]*pw_[-kx]; - } - pi_ += ic; /* next input line */ - pw_ -= kc; /* next mask line */ - } - /* Update output */ - *r_++ += alpha*sum; - } - } - - } else { - /* SSE-based convolution */ - for(yy = 0; yy < or; yy++) { - real *pw_ = k_ + kr*kc - 1; - real *pi_ = t_ + yy*sr*ic; - for (ky = 0; ky < kr; ky++) { - real *pis_ = pi_; - for (kx = 0; kx < kc; kx++) { - THVector_(cadd)(r_, r_, pis_, alpha*pw_[-kx], oc); - pis_++; - } - pi_ += ic; /* next input line */ - pw_ -= kc; /* next mask line */ - } - r_ += oc; - } - } -} - -/* - 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. -*/ -void THTensor_(fullConv2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc) -{ - long oc = (ic - 1) * sc + kc; - - long xx, yy, kx, ky; - - if ((sc != 1) || (ic < 4)) { - /* regular convolution */ - for(yy = 0; yy < ir; yy++) { - for(xx = 0; xx < ic; xx++) { - /* Outer product in two dimensions... (between input image and the mask) */ - real *po_ = r_ + yy*sr*oc + xx*sc; - real *pw_ = k_; - for(ky = 0; ky < kr; ky++) - { - real z = *t_ * alpha; - for(kx = 0; kx < kc; kx++) { - po_[kx] += z * pw_[kx]; - } - po_ += oc; /* next input line */ - pw_ += kc; /* next mask line */ - } - t_++; - } - } - - } else { - /* SSE-based convolution */ - for(yy = 0; yy < ir; yy++) { - real *po_ = r_ + yy*sr*oc; - real *pw_ = k_; - for (ky = 0; ky < kr; ky++) { - real *pos_ = po_; - for (kx = 0; kx < kc; kx++) { - THVector_(cadd)(pos_, pos_, t_, alpha*pw_[kx], ic); - pos_++; - } - po_ += oc; /* next input line */ - pw_ += kc; /* next mask line */ - } - t_ += ic; - } - } -} - -/* - 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. -*/ -void THTensor_(fullXCorr2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc) -{ - long oc = (ic - 1) * sc + kc; - - long xx, yy, kx, ky; - - if ((sc != 1) || (ic < 4)) { - /* regular convolution */ - for(yy = 0; yy < ir; yy++) { - for(xx = 0; xx < ic; xx++) { - /* Outer product in two dimensions... (between input image and the mask) */ - real *po_ = r_ + yy*sr*oc + xx*sc; - real *pw_ = k_ + kr*kc -1; - long kx, ky; - for(ky = 0; ky < kr; ky++) - { - real z = *t_ * alpha; - for(kx = 0; kx < kc; kx++) { - po_[kx] += z * pw_[-kx]; - } - po_ += oc; /* next input line */ - pw_ -= kc; /* next mask line */ - } - t_++; - } - } - - } else { - /* SSE-based convolution */ - for(yy = 0; yy < ir; yy++) { - real *po_ = r_ + yy*sr*oc; - real *pw_ = k_ + kr*kc -1; - for (ky = 0; ky < kr; ky++) { - real *pos_ = po_; - for (kx = 0; kx < kc; kx++) { - THVector_(cadd)(pos_, pos_, t_, pw_[-kx]*alpha, ic); - pos_++; - } - po_ += oc; /* next input line */ - pw_ -= kc; /* next mask line */ - } - t_ += ic; - } - } -} - -/* - 2D Input, 2D kernel : convolve given image with the given kernel, valid convolution. - for sr,sc=1 this is equivalent to validXCorr2Dptr, but otherwise it is useful for - calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 -*/ -void THTensor_(validXCorr2DRevptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc) -{ - long or = ir - (kr - 1) * sr; - long oc = ic - (kc - 1) * sc; - - long xx, yy, kx, ky; - - if ((sc != 1) || (kc < 4)) { - /* regular convolution */ - for(yy = 0; yy < kr; yy++) { - for(xx = 0; xx < kc; xx++) { - real *po_ = r_; - real *pi_ = t_ + yy*sr*ic + xx*sc; - real z = *k_++ * alpha; - - for(ky = 0; ky < or; ky++) { - for(kx = 0; kx < oc; kx++) - po_[kx] += z * pi_[kx]; - pi_ += ic; - po_ += oc; - } - } - } - - } else { - /* SSE-based convolution */ - for(yy = 0; yy < kr; yy++) { - for(xx = 0; xx < kc; xx++) { - real *po_ = r_; - real *pi_ = t_ + yy*sr*ic + xx*sc; - real z = *k_++ * alpha; - - for(ky = 0; ky < or; ky++) { - THVector_(cadd)(po_, po_, pi_, z, oc); - pi_ += ic; - po_ += oc; - } - } - } - } -} -/* - 3D Input, 3D kernel : convolve given volume with the given kernel. -*/ -void THTensor_(validXCorr3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc) -{ - long ot = (it - kt) / st + 1; - long or = (ir - kr) / sr + 1; - long oc = (ic - kc) / sc + 1; - - long zz, xx, yy; - - for (zz = 0; zz < ot; zz++) - { - for(yy = 0; yy < or; yy++) - { - for(xx = 0; xx < oc; xx++) - { - /* Dot product in two dimensions... (between input image and the mask) */ - real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; - real *pw_ = k_; - real sum = 0; - long kz, kx, ky; - for(kz = 0; kz < kt; kz++) - { - for(ky = 0; ky < kr; ky++) - { - for(kx = 0; kx < kc; kx++) { - sum += pi_[kx]*pw_[kx]; - } - pi_ += ic; /* next input line */ - pw_ += kc; /* next mask line */ - } - pi_ += (ir-kr)*ic; /* next input slice */ - } - /* Update output */ - *r_++ += sum*alpha; - } - } - } -} - -/* - 3D Input, 3D kernel : convolve given volume with the given kernel. -*/ -void THTensor_(validConv3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc) -{ - long ot = (it - kt) / st + 1; - long or = (ir - kr) / sr + 1; - long oc = (ic - kc) / sc + 1; - - long zz, xx, yy; - - for(zz = 0; zz < ot; zz++) - { - for(yy = 0; yy < or; yy++) - { - for(xx = 0; xx < oc; xx++) - { - /* Dot product in two dimensions... (between input image and the mask) */ - real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; - real *pw_ = k_ + kt*kr*kc - 1; - real sum = 0; - long kz, kx, ky; - for(kz = 0; kz < kt; kz++) - { - for(ky = 0; ky < kr; ky++) - { - for(kx = 0; kx < kc; kx++) { - sum += pi_[kx]*pw_[-kx]; - } - pi_ += ic; /* next input line */ - pw_ -= kc; /* next mask line */ - } - pi_ += (ir-kr)*ic; /* next input slice */ - } - /* Update output */ - *r_++ += alpha*sum; - } - } - } -} - - -/* - 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. -*/ -void THTensor_(fullConv3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc) -{ - long or = (ir - 1) * sr + kr; - long oc = (ic - 1) * sc + kc; - - long zz, xx, yy; - - for(zz = 0; zz < it; zz++) - { - for(yy = 0; yy < ir; yy++) - { - for(xx = 0; xx < ic; xx++) - { - /* Outer product in two dimensions... (between input image and the mask) */ - real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; - real *pw_ = k_; - long kz, kx, ky; - /* printf("Output Plane : %ld,%ld,%ld, input val=%g\n",zz,yy,xx,*t_); */ - for(kz = 0; kz < kt; kz++) - { - for(ky = 0; ky < kr; ky++) - { - real z = *t_ * alpha; - for(kx = 0; kx < kc; kx++) { - /* printf("o=%g,k=%g," , po_[kx],pw_[kx]); */ - po_[kx] += z * pw_[kx]; - /* printf("o=%g " , po_[kx]); */ - } - /* printf("\n"); */ - po_ += oc; /* next input line */ - pw_ += kc; /* next mask line */ - } - po_ += (or-kr)*oc; /* next output slice */ - /* printf("\n"); */ - } - t_++; - } - } - } -} - -/* - 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. -*/ -void THTensor_(fullXCorr3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc) -{ - long or = (ir - 1) * sr + kr; - long oc = (ic - 1) * sc + kc; - - long zz, xx, yy; - - for(zz = 0; zz < it; zz++) - { - for(yy = 0; yy < ir; yy++) - { - for(xx = 0; xx < ic; xx++) - { - /* Outer product in two dimensions... (between input image and the mask) */ - real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; - real *pw_ = k_ + kt*kr*kc -1; - long kz, kx, ky; - for(kz = 0; kz < kt; kz++) - { - for(ky = 0; ky < kr; ky++) - { - real z = *t_ * alpha; - for(kx = 0; kx < kc; kx++) { - po_[kx] += z * pw_[-kx]; - } - po_ += oc; /* next input line */ - pw_ -= kc; /* next mask line */ - } - po_ += (or-kr)*oc; /* next output slice */ - } - t_++; - } - } - } -} - -/* - 3D Input, 3D kernel : convolve given image with the given kernel, valid convolution. - for sr,sc=1 this is equivalent to validXCorr3Dptr, but otherwise it is useful for - calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 -*/ -void THTensor_(validXCorr3DRevptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc) -{ - long ot = it - (kt - 1) * st; - long or = ir - (kr - 1) * sr; - long oc = ic - (kc - 1) * sc; - - long zz, xx, yy; - for(zz = 0; zz < kt; zz++) - { - for(yy = 0; yy < kr; yy++) - { - for(xx = 0; xx < kc; xx++) - { - real *po_ = r_; - real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; - real z = *k_++ * alpha; - long kz, kx, ky; - for(kz = 0; kz < ot; kz++) - { - for(ky = 0; ky < or; ky++) - { - for(kx = 0; kx < oc; kx++) - po_[kx] += z * pi_[kx]; - pi_ += ic; - po_ += oc; - } - pi_ += (ir-or)*ic; /* next input slice */ - } - } - } - } -} - -void THTensor_(conv2d)(real* output_data, - real alpha, - real* ptr_input, long nInputRows, long nInputCols, - real* ptr_weight, long nKernelRows, long nKernelCols, - long srow, long scol, - const char *vf, const char *xc) -{ - THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); - if (*vf == 'F') - if (*xc == 'X') - THTensor_(fullXCorr2Dptr)(output_data, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(fullConv2Dptr)(output_data, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - if (*xc == 'X') - THTensor_(validXCorr2Dptr)(output_data, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(validConv2Dptr)(output_data, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); -} - -void THTensor_(conv3d)(real* output_data, - real alpha, - real* ptr_input, long nInputDepth, long nInputRows, long nInputCols, - real* ptr_weight, long nKernelDepth, long nKernelRows, long nKernelCols, - long sdepth, long srow, long scol, - const char *vf, const char *xc) -{ - THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); - if (*vf == 'F') - if (*xc == 'X') - THTensor_(fullXCorr3Dptr)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol); - else - THTensor_(fullConv3Dptr)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol); - else - if (*xc == 'X') - THTensor_(validXCorr3Dptr)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol); - else - THTensor_(validConv3Dptr)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol); -} - -long THTensor_(convsize)(long x, long k, long s, const char* vf) -{ - THArgCheck(*vf == 'V' || *vf == 'F', 1, "type of convolution can be 'V' or 'F'"); - if (*vf == 'V') - return (x-k)/s + 1; - else - return (x-1)*s + k; -} - - -/* - 3D input, 3D kernel, 4D output - like rank1 update - A <- xx' + beta*A - for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for - calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 -*/ -void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) -{ - long nInputPlane, nInputRows, nInputCols; - long nKernelPlane, nKernelRows, nKernelCols; - long nOutputPlane, nOutputRows, nOutputCols; - long istride0, kstride0; - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k; - - THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); - THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - nInputPlane = input->size[0]; - istride0 = input->stride[0]; - nInputRows = input->size[1]; - nInputCols = input->size[2]; - - kstride0 = kernel->stride[0]; - nKernelPlane = kernel->size[0]; - nKernelRows = kernel->size[1]; - nKernelCols = kernel->size[2]; - nOutputPlane = nInputPlane * kernel->size[0]; - - THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "covn2DRevger : Input image is smaller than kernel"); - - nOutputRows = nInputRows - (nKernelRows - 1) * srow; - nOutputCols = nInputCols - (nKernelCols - 1) * scol; - - nelem = THTensor_(nElement)(r_); - THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - /*THTensor_(zero)(r_);*/ - -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]*r_->size[1]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] = 0.0; - } - } - else if (beta != 1) - { - /*THTensor_(mul)(r_, beta);*/ -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]*r_->size[1]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] *= beta; - } - } - -#pragma omp parallel for private(k) - for(k = 0; k < nKernelPlane; k++) - { - long i; - /* get kernel */ - real *ptr_weight = weight_data+k*kstride0; - - for(i = 0; i < nInputPlane; i++) - { - /* get output */ - real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; - /* get input */ - real *ptr_input = input_data+i*istride0; - - /* do image, kernel convolution */ - THTensor_(validXCorr2DRevptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - /* Next output plane */ - /* output_data += nOutputCols*nOutputRows; */ - } - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - - -/* - 3D input, 3D kernel, 4D output - like rank1 update - A <- xx' + beta*A - for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for - calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 -*/ -void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) -{ - long nbatch, nInputPlane, nInputRows, nInputCols; - long nKernelPlane, nKernelRows, nKernelCols; - long nOutputRows, nOutputCols; - long istride0, kstride0, istride1, kstride1; - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k; - - THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); - THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - istride0 = input->stride[0]; - istride1 = input->stride[1]; - nbatch = input->size[0]; - nInputPlane = input->size[1]; - nInputRows = input->size[2]; - nInputCols = input->size[3]; - - kstride0 = kernel->stride[0]; - kstride1 = kernel->stride[1]; - nKernelPlane = kernel->size[1]; - nKernelRows = kernel->size[2]; - nKernelCols = kernel->size[3]; - - THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv2DRevger : Input image is smaller than kernel"); - THArgCheck(kernel->size[0] == input->size[0] , 2, "conv2DRevger : Input batch and kernel batch is not same size"); - - nOutputRows = nInputRows - (nKernelRows - 1) * srow; - nOutputCols = nInputCols - (nKernelCols - 1) * scol; - - nelem = THTensor_(nElement)(r_); - THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - /*THTensor_(zero)(r_);*/ - -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]*r_->size[1]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] = 0.0; - } - } - else if (beta != 1) - { - /*THTensor_(mul)(r_, beta);*/ -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]*r_->size[1]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] *= beta; - } - } - -#pragma omp parallel for private(k) - for(k = 0; k < nKernelPlane; k++) - { - long i; - for(i = 0; i < nInputPlane; i++) - { - long p; - for(p = 0; p < nbatch; p++) - { - /* get kernel */ - real *ptr_weight = weight_data + p*kstride0 + k*kstride1; - /* get output */ - real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; - /* get input */ - real *ptr_input = input_data + p*istride0 + i*istride1; - - /* do image, kernel convolution */ - THTensor_(validXCorr2DRevptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - /* Next output plane */ - /* output_data += nOutputCols*nOutputRows; */ - } - } - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - - -/* - 3D input, 3D kernel, 4D output - like rank1 update - A <- xx' + beta*A -*/ -void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputRows, nInputCols; - long nKernelPlane, nKernelRows, nKernelCols; - long nOutputPlane, nOutputRows, nOutputCols; - long istride0, kstride0; - - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k; - - THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); - THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - nInputPlane = input->size[0]; - istride0 = input->stride[0]; - nInputRows = input->size[1]; - nInputCols = input->size[2]; - - kstride0 = kernel->stride[0]; - nKernelPlane = kernel->size[0]; - nKernelRows = kernel->size[1]; - nKernelCols = kernel->size[2]; - nOutputPlane = nInputPlane * kernel->size[0]; - - THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dger : Input image is smaller than kernel"); - - if (*vf == 'F') { - nOutputRows = (nInputRows - 1) * srow + nKernelRows; - nOutputCols = (nInputCols - 1) * scol + nKernelCols; - } else { /* valid */ - nOutputRows = (nInputRows - nKernelRows) / srow + 1; - nOutputCols = (nInputCols - nKernelCols) / scol + 1; - } - - nelem = THTensor_(nElement)(r_); - THTensor_(resize4d)(r_, nKernelPlane, nInputPlane, nOutputRows, nOutputCols); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - /*THTensor_(zero)(r_);*/ -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]*r_->size[1]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] = 0.0; - } - } - else if (beta != 1) - { - /*THTensor_(mul)(r_, beta);*/ -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]*r_->size[1]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] *= beta; - } - } - -#pragma omp parallel for private(k) - for(k = 0; k < nKernelPlane; k++) - { - long i; - /* get kernel */ - real *ptr_weight = weight_data+k*kstride0; - - for(i = 0; i < nInputPlane; i++) - { - /* get output */ - real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; - /* get input */ - real *ptr_input = input_data+i*istride0; - - /* do image, kernel convolution */ - if (*vf == 'F') - if (*xc == 'X') - THTensor_(fullXCorr2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(fullConv2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - if (*xc == 'X') - THTensor_(validXCorr2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(validConv2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - /* Next output plane */ - /* output_data += nOutputCols*nOutputRows; */ - } - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - - -/* - 3D input, 4D kernel, 3D output - matrix vector product like - y <- Ax + beta*y -*/ -void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputRows, nInputCols; - long nKernelRows, nKernelCols; - long nOutputPlane, nOutputRows, nOutputCols; - long istride0, kstride0, kstride1; - THTensor *input; - THTensor* kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k; - - THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); - THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { - kernel = THTensor_(newContiguous)(k_); - } else { - THTensor_(retain)(k_); - kernel = k_; - } - - nInputPlane = input->size[0]; - istride0 = input->stride[0]; - nInputRows = input->size[1]; - nInputCols = input->size[2]; - - kstride0 = kernel->stride[0]; - kstride1 = kernel->stride[1]; - nKernelRows = kernel->size[2]; - nKernelCols = kernel->size[3]; - nOutputPlane = kernel->size[0]; - THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); - - THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); - - if (*vf == 'F') { - nOutputRows = (nInputRows - 1) * srow + nKernelRows; - nOutputCols = (nInputCols - 1) * scol + nKernelCols; - } else { /* valid */ - nOutputRows = (nInputRows - nKernelRows) / srow + 1; - nOutputCols = (nInputCols - nKernelCols) / scol + 1; - } - - nelem = THTensor_(nElement)(r_); - THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - /*THTensor_(zero)(r_);*/ -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] = 0.0; - } - } - else if (beta != 1) - { - /*THTensor_(mul)(r_, beta);*/ -#pragma omp parallel for private(k) - for (k = 0; k < r_->size[0]; k++) - { - real* ptr_output = output_data + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] *= beta; - } - } - -#pragma omp parallel for private(k) - for(k = 0; k < nOutputPlane; k++) - { - long i; - /* get output */ - real *ptr_output = output_data + k*nOutputCols*nOutputRows; - for(i = 0; i < nInputPlane; i++) - { - /* get kernel */ - real *ptr_weight = weight_data + k*kstride0 + i*kstride1; - /* get input */ - real *ptr_input = input_data + i*istride0; - - /* do image, kernel convolution */ - if (*vf == 'F') - if (*xc == 'X') - THTensor_(fullXCorr2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(fullConv2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - if (*xc == 'X') - THTensor_(validXCorr2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(validConv2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - } - /* Next output plane */ - /* output_data += nOutputCols*nOutputRows;*/ - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - - -/* - 3D input, 4D kernel, 3D output - matrix vector product like - y <- Ax + beta*y -*/ -void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputRows, nInputCols; - long nKernelRows, nKernelCols; - long nOutputPlane, nOutputRows, nOutputCols; - long kstride0, kstride1; - THTensor *input; - THTensor* kernel; - long nbatch; - ptrdiff_t nelem; - real *input_data; - real *weight_data; - real *output_data; - long p; - - THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); - THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { - kernel = THTensor_(newContiguous)(k_); - } else { - THTensor_(retain)(k_); - kernel = k_; - } - - nbatch = input->size[0]; - nInputPlane = input->size[1]; - nInputRows = input->size[2]; - nInputCols = input->size[3]; - - kstride0 = kernel->stride[0]; - kstride1 = kernel->stride[1]; - nKernelRows = kernel->size[2]; - nKernelCols = kernel->size[3]; - nOutputPlane = kernel->size[0]; - THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); - - THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); - - if (*vf == 'F') { - nOutputRows = (nInputRows - 1) * srow + nKernelRows; - nOutputCols = (nInputCols - 1) * scol + nKernelCols; - } else { /* valid */ - nOutputRows = (nInputRows - nKernelRows) / srow + 1; - nOutputCols = (nInputCols - nKernelCols) / scol + 1; - } - - nelem = THTensor_(nElement)(r_); - THTensor_(resize4d)(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - /*THTensor_(zero)(r_);*/ -#pragma omp parallel for private(p) - for (p=0; p < r_->size[0]; p++) - { - long k; - for (k = 0; k < r_->size[1]; k++) - { - real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] = 0.0; - } - } - } - else if (beta != 1) - { - /*THTensor_(mul)(r_, beta);*/ -#pragma omp parallel for private(p) - for(p=0; p < r_->size[0]; p++) - { - long k; - for (k = 0; k < r_->size[1]; k++) - { - real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; - long l; - for (l = 0; l < nOutputRows*nOutputCols; l++) - ptr_output[l] *= beta; - } - } - } - -#pragma omp parallel for private(p) - for(p=0; p < nbatch; p++) - { - long k; - for(k = 0; k < nOutputPlane; k++) - { - long i; - /* get output */ - real *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows; - for(i = 0; i < nInputPlane; i++) - { - /* get kernel */ - real *ptr_weight = weight_data + k*kstride0 + i*kstride1; - /* get input */ - real *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols; - - /* do image, kernel convolution */ - if (*vf == 'F') - if (*xc == 'X') - THTensor_(fullXCorr2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(fullConv2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - if (*xc == 'X') - THTensor_(validXCorr2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - else - THTensor_(validConv2Dptr)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol); - } - /* Next output plane */ - /* output_data += nOutputCols*nOutputRows;*/ - } - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - - -/* - 2D input, 2D kernel, 2D output - scalar multiplication like - y <- x*y + beta*y -*/ -void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) -{ - THTensor *input; - THTensor* kernel; - long nInputRows; - long nInputCols; - long nKernelRows; - long nKernelCols; - long nOutputRows, nOutputCols; - real *ptr_input; - real *ptr_weight; - real *output_data; - ptrdiff_t nelem; - - THArgCheck(t_->nDimension == 2 , 3, "input: 2D Tensor expected"); - THArgCheck(k_->nDimension == 2 , 4, "kernel: 2D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - nInputRows = input->size[0]; - nInputCols = input->size[1]; - nKernelRows = kernel->size[0]; - nKernelCols = kernel->size[1]; - - THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmul : Input image is smaller than kernel"); - - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize2d)(r_, nOutputRows, nOutputCols); - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - THTensor_(zero)(r_); - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - ptr_input = THTensor_(data)(input); - ptr_weight = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - - /* do image, kernel convolution */ - THTensor_(conv2d)(output_data, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol, vf, xc); - THTensor_(free)(input); - THTensor_(free)(kernel); -} - -/* - 3D input, 3D kernel, 3D output - component wise multiplication like - y <- y.*x + beta*y -*/ -void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputRows, nInputCols; - long nKernelRows, nKernelCols; - long nOutputPlane, nOutputRows, nOutputCols; - long istride0, kstride0; - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k; - - THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); - THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - istride0 = input->stride[0]; - nInputPlane = input->size[0]; - nInputRows = input->size[1]; - nInputCols = input->size[2]; - - kstride0 = kernel->stride[0]; - nOutputPlane = kernel->size[0]; - nKernelRows = kernel->size[1]; - nKernelCols = kernel->size[2]; - - THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); - THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dcmul : Input image is smaller than kernel"); - - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - THTensor_(zero)(r_); - } - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - for(k = 0; k < nOutputPlane; k++) - { - /* get kernel */ - real *ptr_weight = weight_data + k*kstride0; - /* get input */ - real *ptr_input = input_data + k*istride0; - - /* do image, kernel convolution */ - THTensor_(conv2d)(output_data, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol, vf, xc); - /* Next output plane */ - output_data += nOutputCols*nOutputRows; - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - -/* - 3D input, 3D kernel, 3D output - component wise multiplication like with a permutation map - y <- y.*x + beta*y -*/ -void THTensor_(conv2Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputRows, nInputCols; - long nKernelRows, nKernelCols; - long nOutputPlane, nOutputRows, nOutputCols; - long istride0, kstride0; - THTensor *input; - THTensor* kernel; - real *input_data; - real *weight_data; - real *output_data; - long nmaps; - ptrdiff_t nelem; - long k; - - THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); - THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); - THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); - THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - istride0 = input->stride[0]; - nInputPlane = input->size[0]; - nInputRows = input->size[1]; - nInputCols = input->size[2]; - - kstride0 = kernel->stride[0]; - nOutputPlane = kernel->size[0]; - nKernelRows = kernel->size[1]; - nKernelCols = kernel->size[2]; - - THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); - THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) - || *vf == 'F', 2, "conv2Dmap : Input image is smaller than kernel"); - - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - THTensor_(zero)(r_); - } - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - nmaps = map->size[0]; - - for(k = 0; k < nmaps; k++) - { - /* get indices */ - long from = (long)THTensor_(get2d)(map,k,0)-1; - long to = (long)THTensor_(get2d)(map,k,1)-1; - - /* get kernel */ - real *ptr_weight = weight_data + k*kstride0; - /* get input */ - real *ptr_input = input_data + from*istride0; - /* get output */ - real *ptr_output = output_data + to*nOutputRows*nOutputCols; - - /* do image, kernel convolution */ - THTensor_(conv2d)(ptr_output, - alpha, - ptr_input, nInputRows, nInputCols, - ptr_weight, nKernelRows, nKernelCols, - srow, scol, vf, xc); - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - -/* - 4D input, 4D kernel, 5D output - like rank1 update - A <- xx' + beta*A - for sr,sc=1 this is equivalent to xcorr2Dger, but otherwise it is useful for - calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 -*/ -void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, - long sdepth, long srow, long scol) -{ - long nInputPlane, nInputDepth, nInputRows, nInputCols; - long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; - long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; - long istride0, kstride0; - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k, i; - - THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); - THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); - THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); - THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - nInputPlane = input->size[0]; - istride0 = input->stride[0]; - nInputDepth = input->size[1]; - nInputRows = input->size[2]; - nInputCols = input->size[3]; - - kstride0 = kernel->stride[0]; - nKernelPlane = kernel->size[0]; - nKernelDepth= kernel->size[1]; - nKernelRows = kernel->size[2]; - nKernelCols = kernel->size[3]; - nOutputPlane = nInputPlane * kernel->size[0]; - - THArgCheck(nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv3DRevger : Input image is smaller than kernel"); - - nOutputDepth = nInputDepth - (nKernelDepth - 1) * sdepth; - nOutputRows = nInputRows - (nKernelRows - 1) * srow; - nOutputCols = nInputCols - (nKernelCols - 1) * scol; - - nelem = THTensor_(nElement)(r_); - THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - THTensor_(zero)(r_); - } - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - for(k = 0; k < nKernelPlane; k++) - { - /* get kernel */ - real *ptr_weight = weight_data+k*kstride0; - - for(i = 0; i < nInputPlane; i++) - { - /* get input */ - real *ptr_input = input_data+i*istride0; - - /* do image, kernel convolution */ - THTensor_(validXCorr3DRevptr)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol); - /* Next output plane */ - output_data += nOutputDepth*nOutputCols*nOutputRows; - } - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - - -/* - 4D input, 4D kernel, 5D output - like rank1 update - A <- xx' + beta*A -*/ -void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, - long sdepth, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputDepth, nInputRows, nInputCols; - long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; - long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; - long istride0, kstride0; - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k, i; - - THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); - THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); - THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); - THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - nInputPlane = input->size[0]; - istride0 = input->stride[0]; - nInputDepth = input->size[1]; - nInputRows = input->size[2]; - nInputCols = input->size[3]; - - kstride0 = kernel->stride[0]; - nKernelPlane = kernel->size[0]; - nKernelDepth = kernel->size[1]; - nKernelRows = kernel->size[2]; - nKernelCols = kernel->size[3]; - nOutputPlane = nInputPlane * kernel->size[0]; - - THArgCheck((nInputDepth >= nKernelDepth - && nInputRows >= nKernelRows - && nInputCols >= nKernelCols) - || *vf == 'F', 2, "conv3Dger : Input image is smaller than kernel"); - - nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - THTensor_(zero)(r_); - } - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - for(k = 0; k < nKernelPlane; k++) - { - /* get kernel */ - real *ptr_weight = weight_data+k*kstride0; - - for(i = 0; i < nInputPlane; i++) - { - /* get input */ - real *ptr_input = input_data+i*istride0; - - /* do image, kernel convolution */ - THTensor_(conv3d)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol, vf, xc); - - /* Next output plane */ - output_data += nOutputDepth*nOutputCols*nOutputRows; - } - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - -/* - 4D input, 5D kernel, 4D output - matrix vector product like - y <- Ax + beta*y -*/ -void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, - long sdepth, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputDepth, nInputRows, nInputCols; - long nKernelDepth, nKernelRows, nKernelCols; - long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; - long istride0, kstride0, kstride1; - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k, i; - - THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); - THArgCheck(k_->nDimension == 5 , 4, "kernel: 5D Tensor expected"); - THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); - THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - if (!(k_->stride[4] == 1) || !(k_->stride[3] == k_->size[4])) { - kernel = THTensor_(newContiguous)(k_); - } else { - THTensor_(retain)(k_); - kernel = k_; - } - - nInputPlane = input->size[0]; - istride0 = input->stride[0]; - nInputDepth = input->size[1]; - nInputRows = input->size[2]; - nInputCols = input->size[3]; - - kstride0 = kernel->stride[0]; - kstride1 = kernel->stride[1]; - nKernelDepth = kernel->size[2]; - nKernelRows = kernel->size[3]; - nKernelCols = kernel->size[4]; - nOutputPlane = kernel->size[0]; - THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); - - THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmv : Input image is smaller than kernel"); - - nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - THTensor_(zero)(r_); - } - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - for(k = 0; k < nOutputPlane; k++) - { - for(i = 0; i < nInputPlane; i++) - { - /* get kernel */ - real *ptr_weight = weight_data + k*kstride0 + i*kstride1; - /* get input */ - real *ptr_input = input_data + i*istride0; - - /* do image, kernel convolution */ - THTensor_(conv3d)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol, vf, xc); - } - /* Next output plane */ - output_data += nOutputDepth*nOutputCols*nOutputRows; - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - -/* - 3D input, 3D kernel, 3D output - scalar multiplication like - y <- x*y + beta*y -*/ -void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, - long sdepth, long srow, long scol, const char *vf, const char *xc) -{ - THTensor *input; - THTensor* kernel; - long nInputDepth; - long nInputRows; - long nInputCols; - long nKernelDepth; - long nKernelRows; - long nKernelCols; - long nOutputDepth, nOutputRows, nOutputCols; - real *ptr_input; - real *ptr_weight; - real *output_data; - ptrdiff_t nelem; - - THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); - THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); - THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); - THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - nInputDepth = input->size[0]; - nInputRows = input->size[1]; - nInputCols = input->size[2]; - nKernelDepth = kernel->size[0]; - nKernelRows = kernel->size[1]; - nKernelCols = kernel->size[2]; - - THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmul : Input image is smaller than kernel"); - - nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize3d)(r_, nOutputDepth, nOutputRows, nOutputCols); - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - THTensor_(zero)(r_); - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - ptr_input = THTensor_(data)(input); - ptr_weight = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - - /* do image, kernel convolution */ - THTensor_(conv3d)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol, vf, xc); - THTensor_(free)(input); - THTensor_(free)(kernel); -} - -/* - 4D input, 4D kernel, 4D output - component wise multiplication like - y <- y.*x + beta*y -*/ -void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, - long sdepth, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputDepth, nInputRows, nInputCols; - long nKernelDepth, nKernelRows, nKernelCols; - long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; - long istride0, kstride0; - - THTensor *input; - THTensor *kernel; - real *input_data; - real *weight_data; - real *output_data; - ptrdiff_t nelem; - long k; - - THArgCheck(t_->nDimension == 4 , 3, "input: 3D Tensor expected"); - THArgCheck(k_->nDimension == 4 , 4, "kernel: 3D Tensor expected"); - THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - istride0 = input->stride[0]; - nInputPlane = input->size[0]; - nInputDepth = input->size[1]; - nInputRows = input->size[2]; - nInputCols = input->size[3]; - - kstride0 = kernel->stride[0]; - nOutputPlane = kernel->size[0]; - nKernelDepth = kernel->size[1]; - nKernelRows = kernel->size[2]; - nKernelCols = kernel->size[3]; - - THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); - THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dcmul : Input image is smaller than kernel"); - - nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - THTensor_(zero)(r_); - } - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - for(k = 0; k < nOutputPlane; k++) - { - /* get kernel */ - real *ptr_weight = weight_data + k*kstride0; - /* get input */ - real *ptr_input = input_data + k*istride0; - - /* do image, kernel convolution */ - THTensor_(conv3d)(output_data, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol, vf, xc); - - /* Next output plane */ - output_data += nOutputDepth*nOutputCols*nOutputRows; - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} - -/* - 4D input, 4D kernel, 4D output - component wise multiplication like with a permutation map - y <- y.*x + beta*y -*/ -void THTensor_(conv3Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, - long sdepth, long srow, long scol, const char *vf, const char *xc) -{ - long nInputPlane, nInputDepth, nInputRows, nInputCols; - long nKernelDepth, nKernelRows, nKernelCols; - long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; - long istride0, kstride0; - - THTensor *input; - THTensor *kernel; - ptrdiff_t nelem; - real *input_data; - real *weight_data; - real *output_data; - long nmaps; - long k; - - THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); - THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); - THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); - THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); - THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); - THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); - THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); - - input = THTensor_(newContiguous)(t_); - kernel = THTensor_(newContiguous)(k_); - - istride0 = input->stride[0]; - nInputPlane = input->size[0]; - nInputDepth = input->size[1]; - nInputRows = input->size[2]; - nInputCols = input->size[3]; - - kstride0 = kernel->stride[0]; - nOutputPlane = kernel->size[0]; - nKernelDepth = kernel->size[1]; - nKernelRows = kernel->size[2]; - nKernelCols = kernel->size[3]; - - THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); - THArgCheck((nInputDepth >= nKernelDepth - && nInputRows >= nKernelRows - && nInputCols >= nKernelCols) || *vf == 'F', - 2, "conv3Dmap : Input image is smaller than kernel"); - - nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); - nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); - nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); - - nelem = THTensor_(nElement)(r_); - THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); - - if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) - { - THTensor_(zero)(r_); - } - else if (beta != 1) - THTensor_(mul)(r_, r_, beta); - - input_data = THTensor_(data)(input); - weight_data = THTensor_(data)(kernel); - output_data = THTensor_(data)(r_); - - nmaps = map->size[0]; - - for(k = 0; k < nmaps; k++) - { - /* get indices */ - long from = (long)THTensor_(get2d)(map,k,0)-1; - long to = (long)THTensor_(get2d)(map,k,1)-1; - - /* get kernel */ - real *ptr_weight = weight_data + k*kstride0; - /* get input */ - real *ptr_input = input_data + from*istride0; - /* get output */ - real *ptr_output = output_data + to*nOutputDepth*nOutputRows*nOutputCols; - - /* do image, kernel convolution */ - THTensor_(conv3d)(ptr_output, - alpha, - ptr_input, nInputDepth, nInputRows, nInputCols, - ptr_weight, nKernelDepth, nKernelRows, nKernelCols, - sdepth, srow, scol, vf, xc); - } - THTensor_(free)(input); - THTensor_(free)(kernel); -} -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.h deleted file mode 100644 index 79866f390..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorConv.h +++ /dev/null @@ -1,79 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorConv.h" -#else - -TH_API void THTensor_(validXCorr2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc); - -TH_API void THTensor_(validConv2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc); - -TH_API void THTensor_(fullXCorr2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc); - -TH_API void THTensor_(fullConv2Dptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc); - -TH_API void THTensor_(validXCorr2DRevptr)(real *r_, - real alpha, - real *t_, long ir, long ic, - real *k_, long kr, long kc, - long sr, long sc); - -TH_API void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol); -TH_API void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol); -TH_API void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); -TH_API void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); -TH_API void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); -TH_API void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); -TH_API void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc); - -TH_API void THTensor_(validXCorr3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc); - -TH_API void THTensor_(validConv3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc); - -TH_API void THTensor_(fullXCorr3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc); - -TH_API void THTensor_(fullConv3Dptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc); - -TH_API void THTensor_(validXCorr3DRevptr)(real *r_, - real alpha, - real *t_, long it, long ir, long ic, - real *k_, long kt, long kr, long kc, - long st, long sr, long sc); - -TH_API void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol); -TH_API void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); -TH_API void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); -TH_API void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); -TH_API void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.c deleted file mode 100644 index d9cd1c0d5..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.c +++ /dev/null @@ -1,136 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorCopy.c" -#else - -int THTensor_(copyTransposeValid)(THTensor *tensor, THTensor *src) { - const int MIN_SZ = 60 * 60; - return THTensor_(isContiguous)(tensor) && - THTensor_(nDimension)(src) == 2 && - THTensor_(stride)(src, 0) == 1 && - THTensor_(stride)(src, 1) == THTensor_(size)(src, 0) && - THTensor_(nElement)(tensor) >= MIN_SZ; -} - -// special case copy where tensor is contiguous and src is a transposed matrix -// This can be generalized to most copies, but it's tricker -void THTensor_(copyTranspose)(THTensor *tensor, THTensor *src) { - #define MIN(x, y) (((x) < (y)) ? (x) : (y)) - #define MAX(x, y) (((x) > (y)) ? (x) : (y)) - -#ifdef TH_REAL_IS_BYTE - const int BLOCK_SZ = 120; -#else - const int BLOCK_SZ = 60; -#endif - - THTensor *buf = THTensor_(newWithSize2d)(BLOCK_SZ, BLOCK_SZ); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(tensor); - real *bp = THTensor_(data)(buf); - - long NR = THTensor_(size)(src, 0); - long NC = THTensor_(size)(src, 1); - for (long R = 0; R < NR; R += BLOCK_SZ) { - for (long C = 0; C < NC; C += BLOCK_SZ) { - real *spo = sp + R + C * NR; - real *rpo = rp + C + R * NC; - - int nr = MIN(NR - R, BLOCK_SZ); - int nc = MIN(NC - C, BLOCK_SZ); - - // 1. copy columns from src to buf - for (int c = 0; c < nc; c++) { - memcpy(bp + c * BLOCK_SZ, spo + c * NR, nr * sizeof(real)); - } - - // 2. transpose buf in place - int rc_max = MAX(nr, nc); - int rc_min = MIN(nr, nc); - for (int r = 0; r < rc_max; r++) { - int end = MIN(r, rc_min); - for (int c = 0; c < end; c++) { - real tmp = bp[r + BLOCK_SZ * c]; - bp[r + BLOCK_SZ * c] = bp[r * BLOCK_SZ + c]; - bp[r * BLOCK_SZ + c] = tmp; - } - } - - // 3. copy rows from buf to dst - for (int r = 0; r < nr; r++) { - memcpy(rpo + r * NC, bp + r * BLOCK_SZ, nc * sizeof(real)); - } - } - } - THTensor_(free)(buf); - #undef MIN - #undef MAX -} - -void THTensor_(copy)(THTensor *tensor, THTensor *src) -{ - if (tensor == src) return; - if (THTensor_(isContiguous)(tensor) && THTensor_(isContiguous)(src) && THTensor_(nElement)(tensor) == THTensor_(nElement)(src)) { - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(tensor); - ptrdiff_t sz = THTensor_(nElement)(tensor); -#ifndef TH_REAL_IS_HALF - THVector_(copy)(rp, sp, sz); -#else - memcpy(rp, sp, sz * sizeof(real)); -#endif -#ifndef TH_REAL_IS_HALF - } else if (THTensor_(copyTransposeValid)(tensor, src)) { - THTensor_(copyTranspose)(tensor, src); -#endif - } else { - TH_TENSOR_APPLY2(real, tensor, real, src, *tensor_data = *src_data;) - } -} - -#define IMPLEMENT_THTensor_COPY(TYPENAMESRC, TYPE_SRC) \ -void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ -{ \ - TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)(*src_data);) \ -} - -#define IMPLEMENT_THTensor_COPY_TO_HALF(TYPENAMESRC, TYPE_SRC) \ -void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ -{ \ - TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = TH_float2half((float)*src_data);) \ -} - -#define IMPLEMENT_THTensor_COPY_FROM_HALF(TYPENAMESRC, TYPE_SRC) \ -void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ -{ \ - TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)TH_half2float(*src_data);) \ -} - -#define IMPLEMENT_THTensor_COPY_TO_FROM_HALF(TYPENAMESRC, TYPE_SRC) \ -void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \ -{ \ - TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = *src_data;) \ -} - -#ifndef TH_REAL_IS_HALF -IMPLEMENT_THTensor_COPY(Byte, unsigned char) -IMPLEMENT_THTensor_COPY(Char, char) -IMPLEMENT_THTensor_COPY(Short, short) -IMPLEMENT_THTensor_COPY(Int, int) -IMPLEMENT_THTensor_COPY(Long, long) -IMPLEMENT_THTensor_COPY(Float, float) -IMPLEMENT_THTensor_COPY(Double, double) -IMPLEMENT_THTensor_COPY_FROM_HALF(Half, THHalf) -#else -/* only allow pass-through for Half */ -IMPLEMENT_THTensor_COPY_TO_FROM_HALF(Half, THHalf) -IMPLEMENT_THTensor_COPY_TO_HALF(Byte, unsigned char) -IMPLEMENT_THTensor_COPY_TO_HALF(Char, char) -IMPLEMENT_THTensor_COPY_TO_HALF(Short, short) -IMPLEMENT_THTensor_COPY_TO_HALF(Int, int) -IMPLEMENT_THTensor_COPY_TO_HALF(Long, long) -IMPLEMENT_THTensor_COPY_TO_HALF(Float, float) -IMPLEMENT_THTensor_COPY_TO_HALF(Double, double) - -#endif /* REAL_IS_HALF */ - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.h deleted file mode 100644 index b9e5bfc99..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorCopy.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorCopy.h" -#else - -/* Support for copy between different Tensor types */ - -TH_API void THTensor_(copy)(THTensor *tensor, THTensor *src); -TH_API void THTensor_(copyByte)(THTensor *tensor, struct THByteTensor *src); -TH_API void THTensor_(copyChar)(THTensor *tensor, struct THCharTensor *src); -TH_API void THTensor_(copyShort)(THTensor *tensor, struct THShortTensor *src); -TH_API void THTensor_(copyInt)(THTensor *tensor, struct THIntTensor *src); -TH_API void THTensor_(copyLong)(THTensor *tensor, struct THLongTensor *src); -TH_API void THTensor_(copyFloat)(THTensor *tensor, struct THFloatTensor *src); -TH_API void THTensor_(copyDouble)(THTensor *tensor, struct THDoubleTensor *src); -TH_API void THTensor_(copyHalf)(THTensor *tensor, struct THHalfTensor *src); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.c deleted file mode 100644 index d4e52f6d7..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.c +++ /dev/null @@ -1,1121 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorLapack.c" -#else - -/* -Check if self is transpose of a contiguous matrix -*/ -static int THTensor_(isTransposedContiguous)(THTensor *self) -{ - return self->stride[0] == 1 && self->stride[1] == self->size[0]; -} -/* -If a matrix is a regular contiguous matrix, make sure it is transposed -because this is what we return from Lapack calls. -*/ -static void THTensor_(checkTransposed)(THTensor *self) -{ - if(THTensor_(isContiguous)(self)) - THTensor_(transpose)(self, NULL, 0, 1); - return; -} -/* -newContiguous followed by transpose -Similar to (newContiguous), but checks if the transpose of the matrix -is contiguous and also limited to 2D matrices. -*/ -static THTensor *THTensor_(newTransposedContiguous)(THTensor *self) -{ - THTensor *tensor; - if(THTensor_(isTransposedContiguous)(self)) - { - THTensor_(retain)(self); - tensor = self; - } - else - { - tensor = THTensor_(newContiguous)(self); - THTensor_(transpose)(tensor, NULL, 0, 1); - } - - return tensor; -} - -/* -Given the result tensor and src tensor, decide if the lapack call should use the -provided result tensor or should allocate a new space to put the result in. - -The returned tensor have to be freed by the calling function. - -nrows is required, because some lapack calls, require output space smaller than -input space, like underdetermined gels. -*/ -static THTensor *THTensor_(checkLapackClone)(THTensor *result, THTensor *src, int nrows) -{ - /* check if user wants to reuse src and if it is correct shape/size */ - if (src == result && THTensor_(isTransposedContiguous)(src) && src->size[1] == nrows) - THTensor_(retain)(result); - else if(src == result || result == NULL) /* in this case, user wants reuse of src, but its structure is not OK */ - result = THTensor_(new)(); - else - THTensor_(retain)(result); - return result; -} - -/* -Same as cloneColumnMajor, but accepts nrows argument, because some lapack calls require -the resulting tensor to be larger than src. -*/ -static THTensor *THTensor_(cloneColumnMajorNrows)(THTensor *self, THTensor *src, int nrows) -{ - THTensor *result; - THTensor *view; - - if (src == NULL) - src = self; - result = THTensor_(checkLapackClone)(self, src, nrows); - if (src == result) - return result; - - THTensor_(resize2d)(result, src->size[1], nrows); - THTensor_(checkTransposed)(result); - - if (src->size[0] == nrows) - THTensor_(copy)(result, src); - else - { - view = THTensor_(newNarrow)(result, 0, 0, src->size[0]); - THTensor_(copy)(view, src); - THTensor_(free)(view); - } - return result; -} - -/* -Create a clone of src in self column major order for use with Lapack. -If src == self, a new tensor is allocated, in any case, the return tensor should be -freed by calling function. -*/ -static THTensor *THTensor_(cloneColumnMajor)(THTensor *self, THTensor *src) -{ - return THTensor_(cloneColumnMajorNrows)(self, src, src->size[0]); -} - -void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) -{ - int free_b = 0; - if (a == NULL) a = ra_; - if (b == NULL) b = rb_; - THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", - a->nDimension); - THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " - "dimensions, but has %d", b->nDimension); - THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld", - a->size[0], a->size[1]); - THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " - "rows, B has %ld", a->size[0], b->size[0]); - - if (b->nDimension == 1) { - b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], - b->stride[0], 1, 0); - free_b = 1; - } - - int n, nrhs, lda, ldb, info; - THIntTensor *ipiv; - THTensor *ra__; // working version of A matrix to be passed into lapack GELS - THTensor *rb__; // working version of B matrix to be passed into lapack GELS - - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - rb__ = THTensor_(cloneColumnMajor)(rb_, b); - - n = (int)ra__->size[0]; - nrhs = (int)rb__->size[1]; - lda = n; - ldb = n; - - ipiv = THIntTensor_newWithSize1d((long)n); - THLapack_(gesv)(n, nrhs, - THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), - THTensor_(data)(rb__), ldb, &info); - - THLapackCheckWithCleanup("Lapack Error in %s : U(%d,%d) is zero, singular U.", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(rb__); - THIntTensor_free(ipiv); - if (free_b) THTensor_(free)(b);), - "gesv", info, info); - - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(freeCopyTo)(rb__, rb_); - THIntTensor_free(ipiv); - if (free_b) THTensor_(free)(b); -} - -void THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a, - const char *uplo, const char *trans, const char *diag) -{ - int free_b = 0; - if (a == NULL) a = ra_; - if (b == NULL) b = rb_; - THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", - a->nDimension); - THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " - "dimensions, but has %d", b->nDimension); - THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld", - a->size[0], a->size[1]); - THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " - "rows, B has %ld", a->size[0], b->size[0]); - - if (b->nDimension == 1) { - b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], - b->stride[0], 1, 0); - free_b = 1; - } - - int n, nrhs, lda, ldb, info; - THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS - THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS - - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - rb__ = THTensor_(cloneColumnMajor)(rb_, b); - - n = (int)ra__->size[0]; - nrhs = (int)rb__->size[1]; - lda = n; - ldb = n; - - THLapack_(trtrs)(uplo[0], trans[0], diag[0], n, nrhs, - THTensor_(data)(ra__), lda, - THTensor_(data)(rb__), ldb, &info); - - - THLapackCheckWithCleanup("Lapack Error in %s : A(%d,%d) is zero, singular A", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(rb__); - if (free_b) THTensor_(free)(b);), - "trtrs", info, info); - - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(freeCopyTo)(rb__, rb_); - if (free_b) THTensor_(free)(b); -} - -void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a) -{ - int free_b = 0; - // Note that a = NULL is interpreted as a = ra_, and b = NULL as b = rb_. - if (a == NULL) a = ra_; - if (b == NULL) b = rb_; - THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", - a->nDimension); - THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " - "dimensions, but has %d", b->nDimension); - THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " - "rows, B has %ld", a->size[0], b->size[0]); - - if (b->nDimension == 1) { - b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], - b->stride[0], 1, 0); - free_b = 1; - } - - int m, n, nrhs, lda, ldb, info, lwork; - THTensor *work = NULL; - real wkopt = 0; - - THTensor *ra__ = NULL; // working version of A matrix to be passed into lapack GELS - THTensor *rb__ = NULL; // working version of B matrix to be passed into lapack GELS - - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - - m = ra__->size[0]; - n = ra__->size[1]; - lda = m; - ldb = (m > n) ? m : n; - - rb__ = THTensor_(cloneColumnMajorNrows)(rb_, b, ldb); - - nrhs = rb__->size[1]; - info = 0; - - - /* get optimal workspace size */ - THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda, - THTensor_(data)(rb__), ldb, - &wkopt, -1, &info); - lwork = (int)wkopt; - work = THTensor_(newWithSize1d)(lwork); - THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda, - THTensor_(data)(rb__), ldb, - THTensor_(data)(work), lwork, &info); - - THLapackCheckWithCleanup("Lapack Error in %s : The %d-th diagonal element of the triangular factor of A is zero", - THCleanup(THTensor_(free)(ra__); - THTensor_(free)(rb__); - THTensor_(free)(work); - if (free_b) THTensor_(free)(b);), - "gels", info,""); - - /* rb__ is currently ldb by nrhs; resize it to n by nrhs */ - rb__->size[0] = n; - if (rb__ != rb_) - THTensor_(resize2d)(rb_, n, nrhs); - - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(freeCopyTo)(rb__, rb_); - THTensor_(free)(work); - if (free_b) THTensor_(free)(b); -} - -void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr) -{ - int n, lda, lwork, info, ldvr; - THTensor *work, *wi, *wr, *a; - real wkopt; - real *rv_data; - long i; - - THTensor *re__ = NULL; - THTensor *rv__ = NULL; - - THArgCheck(a_->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a_->size[0] == a_->size[1], 1,"A should be square"); - - /* we want to definitely clone a_ for geev*/ - a = THTensor_(cloneColumnMajor)(NULL, a_); - - n = a->size[0]; - lda = n; - - wi = THTensor_(newWithSize1d)(n); - wr = THTensor_(newWithSize1d)(n); - - rv_data = NULL; - ldvr = 1; - if (*jobvr == 'V') - { - THTensor_(resize2d)(rv_,n,n); - /* guard against someone passing a correct size, but wrong stride */ - rv__ = THTensor_(newTransposedContiguous)(rv_); - rv_data = THTensor_(data)(rv__); - ldvr = n; - } - THTensor_(resize2d)(re_,n,2); - re__ = THTensor_(newContiguous)(re_); - - /* get optimal workspace size */ - THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi), - NULL, 1, rv_data, ldvr, &wkopt, -1, &info); - - lwork = (int)wkopt; - work = THTensor_(newWithSize1d)(lwork); - - THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi), - NULL, 1, rv_data, ldvr, THTensor_(data)(work), lwork, &info); - - THLapackCheckWithCleanup(" Lapack Error in %s : %d off-diagonal elements of an didn't converge to zero", - THCleanup(THTensor_(free)(re__); - THTensor_(free)(rv__); - THTensor_(free)(a); - THTensor_(free)(wi); - THTensor_(free)(wr); - THTensor_(free)(work);), - "geev", info,""); - - { - real *re_data = THTensor_(data)(re__); - real *wi_data = THTensor_(data)(wi); - real *wr_data = THTensor_(data)(wr); - for (i=0; i<n; i++) - { - re_data[2*i] = wr_data[i]; - re_data[2*i+1] = wi_data[i]; - } - } - - if (*jobvr == 'V') - { - THTensor_(checkTransposed)(rv_); - THTensor_(freeCopyTo)(rv__, rv_); - } - THTensor_(freeCopyTo)(re__, re_); - THTensor_(free)(a); - THTensor_(free)(wi); - THTensor_(free)(wr); - THTensor_(free)(work); -} - -void THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a, const char *jobz, const char *uplo) -{ - if (a == NULL) a = rv_; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a->size[0] == a->size[1], 1,"A should be square"); - - int n, lda, lwork, info; - THTensor *work; - real wkopt; - - THTensor *rv__ = NULL; - THTensor *re__ = NULL; - - rv__ = THTensor_(cloneColumnMajor)(rv_, a); - - n = rv__->size[0]; - lda = n; - - THTensor_(resize1d)(re_,n); - re__ = THTensor_(newContiguous)(re_); - - /* get optimal workspace size */ - THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda, - THTensor_(data)(re_), &wkopt, -1, &info); - lwork = (int)wkopt; - work = THTensor_(newWithSize1d)(lwork); - THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda, - THTensor_(data)(re_), THTensor_(data)(work), lwork, &info); - - THLapackCheckWithCleanup("Lapack Error %s : %d off-diagonal elements didn't converge to zero", - THCleanup(THTensor_(free)(rv__); - THTensor_(free)(re__); - THTensor_(free)(work);), - "syev", info,""); - - THTensor_(freeCopyTo)(rv__, rv_); - THTensor_(freeCopyTo)(re__, re_); - THTensor_(free)(work); -} - -void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char* jobu) -{ - THTensor *ra_ = THTensor_(new)(); - THTensor_(gesvd2)(ru_, rs_, rv_, ra_, a, jobu); - THTensor_(free)(ra_); -} - -void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char* jobu) -{ - if (a == NULL) a = ra_; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - - int k,m, n, lda, ldu, ldvt, lwork, info; - THTensor *work; - THTensor *rvf_ = THTensor_(new)(); - real wkopt; - - THTensor *ra__ = NULL; - THTensor *ru__ = NULL; - THTensor *rs__ = NULL; - THTensor *rv__ = NULL; - - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - - m = ra__->size[0]; - n = ra__->size[1]; - k = (m < n ? m : n); - - lda = m; - ldu = m; - ldvt = n; - - THTensor_(resize1d)(rs_,k); - THTensor_(resize2d)(rvf_,ldvt,n); - if (*jobu == 'A') - THTensor_(resize2d)(ru_,m,ldu); - else - THTensor_(resize2d)(ru_,k,ldu); - - THTensor_(checkTransposed)(ru_); - - /* guard against someone passing a correct size, but wrong stride */ - ru__ = THTensor_(newTransposedContiguous)(ru_); - rs__ = THTensor_(newContiguous)(rs_); - rv__ = THTensor_(newContiguous)(rvf_); - - THLapack_(gesvd)(jobu[0],jobu[0], - m,n,THTensor_(data)(ra__),lda, - THTensor_(data)(rs__), - THTensor_(data)(ru__), - ldu, - THTensor_(data)(rv__), ldvt, - &wkopt, -1, &info); - lwork = (int)wkopt; - work = THTensor_(newWithSize1d)(lwork); - THLapack_(gesvd)(jobu[0],jobu[0], - m,n,THTensor_(data)(ra__),lda, - THTensor_(data)(rs__), - THTensor_(data)(ru__), - ldu, - THTensor_(data)(rv__), ldvt, - THTensor_(data)(work),lwork, &info); - - THLapackCheckWithCleanup(" Lapack Error %s : %d superdiagonals failed to converge.", - THCleanup( - THTensor_(free)(ru__); - THTensor_(free)(rs__); - THTensor_(free)(rv__); - THTensor_(free)(ra__); - THTensor_(free)(work);), - "gesvd", info,""); - - if (*jobu == 'S') - THTensor_(narrow)(rv__,NULL,1,0,k); - - THTensor_(freeCopyTo)(ru__, ru_); - THTensor_(freeCopyTo)(rs__, rs_); - THTensor_(freeCopyTo)(rv__, rvf_); - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(free)(work); - - if (*jobu == 'S') { - THTensor_(narrow)(rvf_,NULL,1,0,k); - } - THTensor_(resizeAs)(rv_, rvf_); - THTensor_(copy)(rv_, rvf_); - THTensor_(free)(rvf_); -} - -void THTensor_(getri)(THTensor *ra_, THTensor *a) -{ - if (a == NULL) a = ra_; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); - - int m, n, lda, info, lwork; - real wkopt; - THIntTensor *ipiv; - THTensor *work; - THTensor *ra__ = NULL; - - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - - m = ra__->size[0]; - n = ra__->size[1]; - lda = m; - ipiv = THIntTensor_newWithSize1d((long)m); - - /* Run LU */ - THLapack_(getrf)(n, n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &info); - THLapackCheckWithCleanup("Lapack Error %s : U(%d,%d) is 0, U is singular", - THCleanup( - THTensor_(free)(ra__); - THIntTensor_free(ipiv);), - "getrf", info, info); - - /* Run inverse */ - THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &wkopt, -1, &info); - lwork = (int)wkopt; - work = THTensor_(newWithSize1d)(lwork); - THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), THTensor_(data)(work), lwork, &info); - THLapackCheckWithCleanup("Lapack Error %s : U(%d,%d) is 0, U is singular", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(work); - THIntTensor_free(ipiv);), - "getri", info, info); - - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(free)(work); - THIntTensor_free(ipiv); -} - -void THTensor_(clearUpLoTriangle)(THTensor *a, const char *uplo) -{ - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); - - int n = a->size[0]; - - /* Build full matrix */ - real *p = THTensor_(data)(a); - long i, j; - - /* Upper Triangular Case */ - if (uplo[0] == 'U') - { - /* Clear lower triangle (excluding diagonals) */ - for (i=0; i<n; i++) { - for (j=i+1; j<n; j++) { - p[n*i + j] = 0; - } - } - } - /* Lower Triangular Case */ - else if (uplo[0] == 'L') - { - /* Clear upper triangle (excluding diagonals) */ - for (i=0; i<n; i++) { - for (j=0; j<i; j++) { - p[n*i + j] = 0; - } - } - } -} - -void THTensor_(copyUpLoTriangle)(THTensor *a, const char *uplo) -{ - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); - - int n = a->size[0]; - - /* Build full matrix */ - real *p = THTensor_(data)(a); - long i, j; - - /* Upper Triangular Case */ - if (uplo[0] == 'U') - { - /* Clear lower triangle (excluding diagonals) */ - for (i=0; i<n; i++) { - for (j=i+1; j<n; j++) { - p[n*i + j] = p[n*j+i]; - } - } - } - /* Lower Triangular Case */ - else if (uplo[0] == 'L') - { - /* Clear upper triangle (excluding diagonals) */ - for (i=0; i<n; i++) { - for (j=0; j<i; j++) { - p[n*i + j] = p[n*j+i]; - } - } - } -} - -void THTensor_(potrf)(THTensor *ra_, THTensor *a, const char *uplo) -{ - if (a == NULL) a = ra_; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); - - int n, lda, info; - THTensor *ra__ = NULL; - - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - - n = ra__->size[0]; - lda = n; - - /* Run Factorization */ - THLapack_(potrf)(uplo[0], n, THTensor_(data)(ra__), lda, &info); - THLapackCheckWithCleanup("Lapack Error in %s : the leading minor of order %d is not positive definite", - THCleanup(THTensor_(free)(ra__);), - "potrf", info, ""); - - THTensor_(clearUpLoTriangle)(ra__, uplo); - THTensor_(freeCopyTo)(ra__, ra_); -} - -void THTensor_(potrs)(THTensor *rb_, THTensor *b, THTensor *a, const char *uplo) -{ - int free_b = 0; - if (b == NULL) b = rb_; - - THArgCheck(a->nDimension == 2, 2, "A should have 2 dimensions, but has %d", - a->nDimension); - THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, "B should have 1 or 2 " - "dimensions, but has %d", b->nDimension); - THArgCheck(a->size[0] == a->size[1], 2, "A should be square, but is %ldx%ld", - a->size[0], a->size[1]); - THArgCheck(a->size[0] == b->size[0], 2, "A,B size incompatible - A has %ld " - "rows, B has %ld", a->size[0], b->size[0]); - - if (b->nDimension == 1) { - b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0], - b->stride[0], 1, 0); - free_b = 1; - } - - int n, nrhs, lda, ldb, info; - THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS - THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS - - ra__ = THTensor_(cloneColumnMajor)(NULL, a); - rb__ = THTensor_(cloneColumnMajor)(rb_, b); - - n = (int)ra__->size[0]; - nrhs = (int)rb__->size[1]; - lda = n; - ldb = n; - - THLapack_(potrs)(uplo[0], n, nrhs, THTensor_(data)(ra__), - lda, THTensor_(data)(rb__), ldb, &info); - - - THLapackCheckWithCleanup("Lapack Error in %s : A(%d,%d) is zero, singular A", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(rb__); - if (free_b) THTensor_(free)(b);), - "potrs", info, info); - - if (free_b) THTensor_(free)(b); - THTensor_(free)(ra__); - THTensor_(freeCopyTo)(rb__, rb_); -} - -void THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo) -{ - if (a == NULL) a = ra_; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); - - int n, lda, info; - THTensor *ra__ = NULL; - - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - - n = ra__->size[0]; - lda = n; - - /* Run inverse */ - THLapack_(potri)(uplo[0], n, THTensor_(data)(ra__), lda, &info); - THLapackCheckWithCleanup("Lapack Error %s : A(%d,%d) is 0, A cannot be factorized", - THCleanup(THTensor_(free)(ra__);), - "potri", info, info); - - THTensor_(copyUpLoTriangle)(ra__, uplo); - THTensor_(freeCopyTo)(ra__, ra_); -} - -/* - Computes the Cholesky factorization with complete pivoting of a real symmetric - positive semidefinite matrix. - - Args: - * `ra_` - result Tensor in which to store the factor U or L from the - Cholesky factorization. - * `rpiv_` - result IntTensor containing sparse permutation matrix P, encoded - as P[rpiv_[k], k] = 1. - * `a` - input Tensor; the input matrix to factorize. - * `uplo` - string; specifies whether the upper or lower triangular part of - the symmetric matrix A is stored. "U"/"L" for upper/lower - triangular. - * `tol` - double; user defined tolerance, or < 0 for automatic choice. - The algorithm terminates when the pivot <= tol. - */ -void THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor *a, const char *uplo, real tol) { - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - THArgCheck(a->size[0] == a->size[1], 1, "A should be square"); - - int n = a->size[0]; - - THTensor *ra__ = THTensor_(cloneColumnMajor)(ra_, a); - THIntTensor_resize1d(rpiv_, n); - - // Allocate working tensor - THTensor *work = THTensor_(newWithSize1d)(2 * n); - - // Run Cholesky factorization - int lda = n; - int rank, info; - - THLapack_(pstrf)(uplo[0], n, THTensor_(data)(ra__), lda, - THIntTensor_data(rpiv_), &rank, tol, - THTensor_(data)(work), &info); - - THLapackCheckWithCleanup("Lapack Error %s : matrix is rank deficient or not positive semidefinite", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(work);), - "pstrf", info,""); - - THTensor_(clearUpLoTriangle)(ra__, uplo); - - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(free)(work); -} - -/* - Perform a QR decomposition of a matrix. - - In LAPACK, two parts of the QR decomposition are implemented as two separate - functions: geqrf and orgqr. For flexibility and efficiency, these are wrapped - directly, below - but to make the common usage convenient, we also provide - this function, which calls them both and returns the results in a more - intuitive form. - - Args: - * `rq_` - result Tensor in which to store the Q part of the decomposition. - * `rr_` - result Tensor in which to store the R part of the decomposition. - * `a` - input Tensor; the matrix to decompose. - -*/ -void THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a) -{ - int m = a->size[0]; - int n = a->size[1]; - int k = (m < n ? m : n); - THTensor *ra_ = THTensor_(new)(); - THTensor *rtau_ = THTensor_(new)(); - THTensor *rr__ = THTensor_(new)(); - THTensor_(geqrf)(ra_, rtau_, a); - THTensor_(resize2d)(rr__, k, ra_->size[1]); - THTensor_(narrow)(rr__, ra_, 0, 0, k); - THTensor_(triu)(rr_, rr__, 0); - THTensor_(resize2d)(rq_, ra_->size[0], k); - THTensor_(orgqr)(rq_, ra_, rtau_); - THTensor_(narrow)(rq_, rq_, 1, 0, k); - THTensor_(free)(ra_); - THTensor_(free)(rtau_); - THTensor_(free)(rr__); -} - -/* - The geqrf function does the main work of QR-decomposing a matrix. - However, rather than producing a Q matrix directly, it produces a sequence of - elementary reflectors which may later be composed to construct Q - for example - with the orgqr function, below. - - Args: - * `ra_` - Result matrix which will contain: - i) The elements of R, on and above the diagonal. - ii) Directions of the reflectors implicitly defining Q. - * `rtau_` - Result tensor which will contain the magnitudes of the reflectors - implicitly defining Q. - * `a` - Input matrix, to decompose. If NULL, `ra_` is used as input. - - For further details, please see the LAPACK documentation. - -*/ -void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a) -{ - if (a == NULL) ra_ = a; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - - THTensor *ra__ = NULL; - - /* Prepare the input for LAPACK, making a copy if necessary. */ - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - - int m = ra__->size[0]; - int n = ra__->size[1]; - int k = (m < n ? m : n); - int lda = m; - THTensor_(resize1d)(rtau_, k); - - /* Dry-run to query the suggested size of the workspace. */ - int info = 0; - real wkopt = 0; - THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda, - THTensor_(data)(rtau_), - &wkopt, -1, &info); - - /* Allocate the workspace and call LAPACK to do the real work. */ - int lwork = (int)wkopt; - THTensor *work = THTensor_(newWithSize1d)(lwork); - THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda, - THTensor_(data)(rtau_), - THTensor_(data)(work), lwork, &info); - - THLapackCheckWithCleanup("Lapack Error %s : unknown Lapack error. info = %i", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(work);), - "geqrf", info,""); - - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(free)(work); -} - -/* - The orgqr function allows reconstruction of a matrix Q with orthogonal - columns, from a sequence of elementary reflectors, such as is produced by the - geqrf function. - - Args: - * `ra_` - result Tensor, which will contain the matrix Q. - * `a` - input Tensor, which should be a matrix with the directions of the - elementary reflectors below the diagonal. If NULL, `ra_` is used as - input. - * `tau` - input Tensor, containing the magnitudes of the elementary - reflectors. - - For further details, please see the LAPACK documentation. - -*/ -void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau) -{ - if (a == NULL) a = ra_; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - - THTensor *ra__ = NULL; - ra__ = THTensor_(cloneColumnMajor)(ra_, a); - - int m = ra__->size[0]; - int n = ra__->size[1]; - int k = tau->size[0]; - int lda = m; - - /* Dry-run to query the suggested size of the workspace. */ - int info = 0; - real wkopt = 0; - THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda, - THTensor_(data)(tau), - &wkopt, -1, &info); - - /* Allocate the workspace and call LAPACK to do the real work. */ - int lwork = (int)wkopt; - THTensor *work = THTensor_(newWithSize1d)(lwork); - THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda, - THTensor_(data)(tau), - THTensor_(data)(work), lwork, &info); - - THLapackCheckWithCleanup(" Lapack Error %s : unknown Lapack error. info = %i", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(work);), - "orgqr", info,""); - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(free)(work); -} - -/* - The ormqr function multiplies Q with another matrix from a sequence of - elementary reflectors, such as is produced by the geqrf function. - - Args: - * `ra_` - result Tensor, which will contain the matrix Q' c. - * `a` - input Tensor, which should be a matrix with the directions of the - elementary reflectors below the diagonal. If NULL, `ra_` is used as - input. - * `tau` - input Tensor, containing the magnitudes of the elementary - reflectors. - * `c` - input Tensor, containing the matrix to be multiplied. - * `side` - char, determining whether c is left- or right-multiplied with Q. - * `trans` - char, determining whether to transpose Q before multiplying. - - For further details, please see the LAPACK documentation. - -*/ -void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans) -{ - if (a == NULL) a = ra_; - THArgCheck(a->nDimension == 2, 1, "A should be 2 dimensional"); - - THTensor *ra__ = NULL; - ra__ = THTensor_(cloneColumnMajor)(ra_, c); - - int m = c->size[0]; - int n = c->size[1]; - int k = tau->size[0]; - int lda; - if (*side == 'L') - { - lda = m; - } - else - { - lda = n; - } - int ldc = m; - - /* Dry-run to query the suggested size of the workspace. */ - int info = 0; - real wkopt = 0; - THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda, - THTensor_(data)(tau), THTensor_(data)(ra__), ldc, - &wkopt, -1, &info); - - /* Allocate the workspace and call LAPACK to do the real work. */ - int lwork = (int)wkopt; - THTensor *work = THTensor_(newWithSize1d)(lwork); - THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda, - THTensor_(data)(tau), THTensor_(data)(ra__), ldc, - THTensor_(data)(work), lwork, &info); - - THLapackCheckWithCleanup(" Lapack Error %s : unknown Lapack error. info = %i", - THCleanup( - THTensor_(free)(ra__); - THTensor_(free)(work);), - "ormqr", info,""); - THTensor_(freeCopyTo)(ra__, ra_); - THTensor_(free)(work); -} - -void THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a) -{ - THArgCheck(THTensor_(nDimension)(a) == 3, 1, "expected 3D tensor, got %dD", THTensor_(nDimension)(a)); - if (!pivot) { - THError("btrifact without pivoting is not implemented on the CPU"); - } - - if (ra_ != a) { - THTensor_(resizeAs)(ra_, a); - THTensor_(copy)(ra_, a); - } - - int m = a->size[1]; - int n = a->size[2]; - if (m != n) { - THError("btrifact is only implemented for square matrices"); - } - long num_batches = THTensor_(size)(a, 0); - THTensor *ra__; - int lda; - - if (ra_->stride[1] == 1) { - // column ordered, what BLAS wants - lda = ra_->stride[2]; - ra__ = ra_; - } else { - // not column ordered, need to make it such (requires copy) - THTensor *transp_r_ = THTensor_(newTranspose)(ra_, 1, 2); - ra__ = THTensor_(newClone)(transp_r_); - THTensor_(free)(transp_r_); - THTensor_(transpose)(ra__, NULL, 1, 2); - lda = ra__->stride[2]; - } - - THTensor *ai = THTensor_(new)(); - THTensor *rai = THTensor_(new)(); - THIntTensor *rpivoti = THIntTensor_new(); - - int info = 0; - int *info_ptr = &info; - if (rinfo_) { - THIntTensor_resize1d(rinfo_, num_batches); - info_ptr = THIntTensor_data(rinfo_); - } - - THIntTensor_resize2d(rpivots_, num_batches, n); - - long batch = 0; - for (; batch < num_batches; ++batch) { - THTensor_(select)(ai, a, 0, batch); - THTensor_(select)(rai, ra__, 0, batch); - THIntTensor_select(rpivoti, rpivots_, 0, batch); - - THLapack_(getrf)(n, n, THTensor_(data)(rai), lda, - THIntTensor_data(rpivoti), info_ptr); - if (rinfo_) { - info_ptr++; - } else if (info != 0) { - break; - } - } - - THTensor_(free)(ai); - THTensor_(free)(rai); - THIntTensor_free(rpivoti); - - if (ra__ != ra_) { - THTensor_(freeCopyTo)(ra__, ra_); - } - - if (!rinfo_ && info != 0) { - THError("failed to factorize batch element %ld (info == %d)", batch, info); - } -} - -void THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots) -{ - THArgCheck(THTensor_(nDimension)(atf) == 3, 1, "expected 3D tensor, got %dD", - THTensor_(nDimension)(atf)); - THArgCheck(THTensor_(nDimension)(b) == 3 || - THTensor_(nDimension)(b) == 2, 4, "expected 2D or 3D tensor"); - THArgCheck(THTensor_(size)(atf, 0) == - THTensor_(size)(b, 0), 3, "number of batches must be equal"); - THArgCheck(THTensor_(size)(atf, 1) == - THTensor_(size)(atf, 2), 3, "A matrices must be square"); - THArgCheck(THTensor_(size)(atf, 1) == - THTensor_(size)(b, 1), 3, "dimensions of A and b must be equal"); - - if (rb_ != b) { - THTensor_(resizeAs)(rb_, b); - THTensor_(copy)(rb_, b); - } - - long num_batches = atf->size[0]; - long n = atf->size[1]; - int nrhs = rb_->nDimension > 2 ? rb_->size[2] : 1; - - int lda, ldb; - THTensor *atf_; - THTensor *rb__; - - // correct ordering of A - if (atf->stride[1] == 1) { - // column ordered, what BLAS wants - lda = atf->stride[2]; - atf_ = atf; - } else { - // not column ordered, need to make it such (requires copy) - // it would be nice if we could use the op(A) flags to automatically - // transpose A if needed, but this leads to unpredictable behavior if the - // user clones A_tf later with a different ordering - THTensor *transp_r_ = THTensor_(newTranspose)(atf, 1, 2); - atf_ = THTensor_(newClone)(transp_r_); - THTensor_(free)(transp_r_); - THTensor_(transpose)(atf_, NULL, 1, 2); - lda = atf_->stride[2]; - } - - // correct ordering of B - if (rb_->stride[1] == 1) { - // column ordered - if (rb_->nDimension == 2 || rb_->size[2] == 1) { - ldb = n; - } else { - ldb = rb_->stride[2]; - } - rb__ = rb_; - } else { - // make column ordered - if (rb_->nDimension > 2) { - THTensor *transp_r_ = THTensor_(newTranspose)(rb_, 1, 2); - rb__ = THTensor_(newClone)(transp_r_); - THTensor_(free)(transp_r_); - THTensor_(transpose)(rb__, NULL, 1, 2); - ldb = rb__->stride[2]; - } else { - rb__ = THTensor_(newClone)(rb_); - ldb = n; - } - } - - THTensor *ai = THTensor_(new)(); - THTensor *rbi = THTensor_(new)(); - THIntTensor *pivoti = THIntTensor_new(); - - if (!THIntTensor_isContiguous(pivots)) { - THError("Error: rpivots_ is not contiguous."); - } - - for (long batch = 0; batch < num_batches; ++batch) { - THTensor_(select)(ai, atf_, 0, batch); - THTensor_(select)(rbi, rb__, 0, batch); - THIntTensor_select(pivoti, pivots, 0, batch); - -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - int info; - THLapack_(getrs)('N', n, nrhs, THTensor_(data)(ai), lda, - THIntTensor_data(pivoti), THTensor_(data)(rbi), - ldb, &info); - if (info != 0) { - THError("Error: Nonzero info."); - } -#else - THError("Unimplemented"); -#endif - } - - THTensor_(free)(ai); - THTensor_(free)(rbi); - THIntTensor_free(pivoti); - - if (atf_ != atf) { - THTensor_(free)(atf_); - } - - if (rb__ != rb_) { - THTensor_(freeCopyTo)(rb__, rb_); - } -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.h deleted file mode 100644 index 878594348..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorLapack.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorLapack.h" -#else - -TH_API void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_); -TH_API void THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_, const char *uplo, const char *trans, const char *diag); -TH_API void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_); -TH_API void THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobz, const char *uplo); -TH_API void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr); -TH_API void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char *jobu); -TH_API void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char *jobu); -TH_API void THTensor_(getri)(THTensor *ra_, THTensor *a); -TH_API void THTensor_(potrf)(THTensor *ra_, THTensor *a, const char *uplo); -TH_API void THTensor_(potrs)(THTensor *rb_, THTensor *b_, THTensor *a_, const char *uplo); -TH_API void THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo); -TH_API void THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a); -TH_API void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a); -TH_API void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau); -TH_API void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans); -TH_API void THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor*a, const char* uplo, real tol); - -TH_API void THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a); -TH_API void THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.c deleted file mode 100644 index db7a0cb19..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.c +++ /dev/null @@ -1,3275 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorMath.c" -#else - -#ifndef NAN - #define NAN (nan(NULL)) -#endif - -#ifdef _OPENMP -#include <omp.h> -#endif - -#define TH_OMP_OVERHEAD_THRESHOLD 100000 - -#ifdef _OPENMP - -#ifndef _WIN32 -#define PRAGMA(P) _Pragma(#P) -#else -#define PRAGMA(P) __pragma(P) -#endif - -#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \ -{ \ - ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR); \ - PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \ - { \ - size_t num_threads = omp_get_num_threads(); \ - size_t tid = omp_get_thread_num(); \ - ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \ - ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \ - TH_TENSOR_offset + TH_TENSOR_size / num_threads; \ - ptrdiff_t TENSOR##_len = TH_TENSOR_end - TH_TENSOR_offset; \ - TYPE *TENSOR##_data = THTensor_(data)(TENSOR) + TH_TENSOR_offset; \ - CODE \ - } \ -} -#else -#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \ -{ \ - TYPE *TENSOR##_data = THTensor_(data)(TENSOR); \ - ptrdiff_t TENSOR##_len = THTensor_(nElement)(TENSOR); \ - CODE \ -} -#endif - -#ifdef _OPENMP -#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ -{ \ - ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \ - PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \ - { \ - size_t num_threads = omp_get_num_threads(); \ - size_t tid = omp_get_thread_num(); \ - ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \ - ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \ - TH_TENSOR_offset + TH_TENSOR_size / num_threads; \ - ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \ - TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \ - TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \ - CODE \ - } \ -} -#else -#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \ -{ \ - TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \ - TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \ - ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \ - CODE \ -} -#endif - -#ifdef _OPENMP -#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \ -{ \ - ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \ - PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \ - { \ - size_t num_threads = omp_get_num_threads(); \ - size_t tid = omp_get_thread_num(); \ - ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \ - ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \ - TH_TENSOR_offset + TH_TENSOR_size / num_threads; \ - ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \ - TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \ - TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \ - TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3) + TH_TENSOR_offset; \ - CODE \ - } \ -} -#else -#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \ -{ \ - TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \ - TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \ - TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3); \ - ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \ - CODE \ -} -#endif - -void THTensor_(fill)(THTensor *r_, real value) -{ - if (THTensor_(isContiguous)(r_) || THTensor_(isTransposed)(r_)) { - TH_TENSOR_APPLY_CONTIG(real, r_, THVector_(fill)(r__data, value, r__len);); - } else { - TH_TENSOR_APPLY(real, r_, - if (r__stride == 1) { - THVector_(fill)(r__data, value, r__size); - r__i = r__size; - r__data += r__stride * r__size; - break; - } else { - *r__data = value; - } - ); - } -} - -void THTensor_(zero)(THTensor *r_) -{ - THTensor_(fill)(r_, 0); -} - -void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value) -{ - TH_TENSOR_APPLY2(real, tensor, unsigned char, mask, - if (*mask_data > 1) - { - THFree(mask_counter); - THFree(tensor_counter); - THError("Mask tensor can take 0 and 1 values only"); - } - else if (*mask_data == 1) - { - *tensor_data = value; - }); -} - -void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src ) -{ - THTensor *srct = THTensor_(newContiguous)(src); - real *src_data = THTensor_(data)(srct); - ptrdiff_t cntr = 0; - ptrdiff_t nelem = THTensor_(nElement)(srct); - if (THTensor_(nElement)(tensor) != THByteTensor_nElement(mask)) - { - THTensor_(free)(srct); - THError("Number of elements of destination tensor != Number of elements in mask"); - } - TH_TENSOR_APPLY2(real, tensor, unsigned char, mask, - if (*mask_data > 1) - { - THTensor_(free)(srct); - THFree(mask_counter); - THFree(tensor_counter); - THError("Mask tensor can take 0 and 1 values only"); - } - else if (*mask_data == 1) - { - if (cntr == nelem) - { - THTensor_(free)(srct); - THFree(mask_counter); - THFree(tensor_counter); - THError("Number of elements of src < number of ones in mask"); - } - *tensor_data = *src_data; - src_data++; - cntr++; - }); - THTensor_(free)(srct); -} - -void THTensor_(maskedSelect)(THTensor *tensor, THTensor *src, THByteTensor *mask) -{ - ptrdiff_t numel = THByteTensor_sumall(mask); - real *tensor_data; - -#ifdef DEBUG - THAssert(numel <= LONG_MAX); -#endif - THTensor_(resize1d)(tensor,numel); - tensor_data = THTensor_(data)(tensor); - TH_TENSOR_APPLY2(real, src, unsigned char, mask, - if (*mask_data > 1) - { - THFree(mask_counter); - THFree(src_counter); - THError("Mask tensor can take 0 and 1 values only"); - } - else if (*mask_data == 1) - { - *tensor_data = *src_data; - tensor_data++; - }); -} - -// Finds non-zero elements of a tensor and returns their subscripts -void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor) -{ - ptrdiff_t numel = 0; - long *subscript_data; - long i = 0; - long dim; - long div = 1; -#ifdef TH_REAL_IS_HALF -#define IS_NONZERO(val) ((val.x & 0x7fff) != 0) -#else -#define IS_NONZERO(val) ((val)!=0) -#endif - - /* First Pass to determine size of subscripts */ - TH_TENSOR_APPLY(real, tensor, - if IS_NONZERO(*tensor_data) { - ++numel; - }); -#ifdef DEBUG - THAssert(numel <= LONG_MAX); -#endif - THLongTensor_resize2d(subscript, numel, tensor->nDimension); - - /* Second pass populates subscripts */ - subscript_data = THLongTensor_data(subscript); - TH_TENSOR_APPLY(real, tensor, - if IS_NONZERO(*tensor_data) { - div = 1; - - for (dim = tensor->nDimension - 1; dim >= 0; dim--) { - *(subscript_data + dim) = (i/div) % tensor->size[dim]; - div *= tensor->size[dim]; - } - - subscript_data += tensor->nDimension; - } - ++i;); -} - -void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index) -{ - ptrdiff_t i, numel; - THLongStorage *newSize; - THTensor *tSlice, *sSlice; - long *index_data; - real *tensor_data, *src_data; - - THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); - THArgCheck(dim < src->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); - THArgCheck(src->nDimension > 0,2,"Source tensor is empty"); - - numel = THLongTensor_nElement(index); - - newSize = THLongStorage_newWithSize(src->nDimension); - THLongStorage_rawCopy(newSize,src->size); -#ifdef DEBUG - THAssert(numel <= LONG_MAX); -#endif - newSize->data[dim] = numel; - THTensor_(resize)(tensor,newSize,NULL); - THLongStorage_free(newSize); - - index = THLongTensor_newContiguous(index); - index_data = THLongTensor_data(index); - - if (dim == 0 && THTensor_(isContiguous)(src) && THTensor_(isContiguous)(tensor)) - { - tensor_data = THTensor_(data)(tensor); - src_data = THTensor_(data)(src); - ptrdiff_t rowsize = THTensor_(nElement)(src) / src->size[0]; - - // check that the indices are within range - long max = src->size[0] - 1 + TH_INDEX_BASE; - for (i=0; i<numel; i++) { - if (index_data[i] < TH_INDEX_BASE || index_data[i] > max) { - THLongTensor_free(index); - THError("index out of range"); - } - } - - if (src->nDimension == 1) { - #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<numel; i++) - tensor_data[i] = src_data[index_data[i] - TH_INDEX_BASE]; - } else { - #pragma omp parallel for if(numel*rowsize > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<numel; i++) - memcpy(tensor_data + i*rowsize, src_data + (index_data[i] - TH_INDEX_BASE)*rowsize, rowsize*sizeof(real)); - } - } - else if (src->nDimension == 1) - { - for (i=0; i<numel; i++) - THTensor_(set1d)(tensor,i,THTensor_(get1d)(src,index_data[i] - TH_INDEX_BASE)); - } - else - { - for (i=0; i<numel; i++) - { - tSlice = THTensor_(new)(); - sSlice = THTensor_(new)(); - THTensor_(select)(tSlice, tensor, dim, i); - THTensor_(select)(sSlice, src, dim, index_data[i] - TH_INDEX_BASE); - THTensor_(copy)(tSlice, sSlice); - THTensor_(free)(tSlice); - THTensor_(free)(sSlice); - } - } - - THLongTensor_free(index); -} - -void THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src) -{ - ptrdiff_t i, numel; - THTensor *tSlice, *sSlice; - long *index_data; - - numel = THLongTensor_nElement(index); - THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); - THArgCheck(dim < src->nDimension, 4, "Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); - THArgCheck(numel == src->size[dim],4,"Number of indices should be equal to source:size(dim)"); - - index = THLongTensor_newContiguous(index); - index_data = THLongTensor_data(index); - - if (tensor->nDimension > 1 ) - { - tSlice = THTensor_(new)(); - sSlice = THTensor_(new)(); - - for (i=0; i<numel; i++) - { - THTensor_(select)(tSlice, tensor, dim, index_data[i] - TH_INDEX_BASE); - THTensor_(select)(sSlice, src, dim, i); - THTensor_(copy)(tSlice, sSlice); - } - - THTensor_(free)(tSlice); - THTensor_(free)(sSlice); - } - else - { - for (i=0; i<numel; i++) - { - THTensor_(set1d)(tensor, index_data[i] - TH_INDEX_BASE, THTensor_(get1d)(src,i)); - } - } - THLongTensor_free(index); -} - -void THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src) -{ - ptrdiff_t i, numel; - THTensor *tSlice, *sSlice; - long *index_data; - - numel = THLongTensor_nElement(index); - THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); - THArgCheck(dim < src->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); - THArgCheck(numel == src->size[dim],4,"Number of indices should be equal to source:size(dim)"); - - index = THLongTensor_newContiguous(index); - index_data = THLongTensor_data(index); - - if (tensor->nDimension > 1) - { - tSlice = THTensor_(new)(); - sSlice = THTensor_(new)(); - - for (i=0; i<numel; i++) - { - THTensor_(select)(tSlice, tensor, dim, index_data[i] - TH_INDEX_BASE); - THTensor_(select)(sSlice, src, dim, i); - THTensor_(cadd)(tSlice, tSlice, 1.0, sSlice); - } - - THTensor_(free)(tSlice); - THTensor_(free)(sSlice); - } - else - { - for (i=0; i<numel; i++) - { - THTensor_(set1d)(tensor, - index_data[i] - TH_INDEX_BASE, - THTensor_(get1d)(src,i) + THTensor_(get1d)(tensor,index_data[i] - TH_INDEX_BASE)); - } - } - THLongTensor_free(index); -} - -void THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, real val) -{ - ptrdiff_t i, numel; - THTensor *tSlice; - long *index_data; - - numel = THLongTensor_nElement(index); - THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); - THArgCheck(dim < tensor->nDimension, 4,"Indexing dim %d is out of bounds of tensor", dim + TH_INDEX_BASE); - - index = THLongTensor_newContiguous(index); - index_data = THLongTensor_data(index); - - for (i=0; i<numel; i++) - { - if (tensor->nDimension > 1) - { - tSlice = THTensor_(new)(); - THTensor_(select)(tSlice, tensor,dim,index_data[i] - TH_INDEX_BASE); - THTensor_(fill)(tSlice, val); - THTensor_(free)(tSlice); - } - else - { - THTensor_(set1d)(tensor, index_data[i] - TH_INDEX_BASE, val); - } - } - THLongTensor_free(index); -} - -void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index) -{ - long elems_per_row, i, idx; - - THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 2, - "Input tensor must have same dimensions as output tensor"); - THArgCheck(dim < THTensor_(nDimension)(tensor), 3, "Index dimension is out of bounds"); - THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(src), 4, - "Index tensor must have same dimensions as input tensor"); - - elems_per_row = THLongTensor_size(index, dim); - - TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim, - for (i = 0; i < elems_per_row; ++i) - { - idx = *(index_data + i*index_stride); - if (idx < TH_INDEX_BASE || idx >= src_size + TH_INDEX_BASE) - { - THFree(TH_TENSOR_DIM_APPLY_counter); - THError("Invalid index in gather"); - } - *(tensor_data + i*tensor_stride) = src_data[(idx - TH_INDEX_BASE) * src_stride]; - }) -} - -void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src) -{ - long elems_per_row, i, idx; - - THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds"); - THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3, - "Index tensor must have same dimensions as output tensor"); - THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4, - "Input tensor must have same dimensions as output tensor"); - - elems_per_row = THLongTensor_size(index, dim); - - TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim, - for (i = 0; i < elems_per_row; ++i) - { - idx = *(index_data + i*index_stride); - if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE) - { - THFree(TH_TENSOR_DIM_APPLY_counter); - THError("Invalid index in scatter"); - } - tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = *(src_data + i*src_stride); - }) -} - -void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src) -{ - long elems_per_row, i, idx; - - THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds"); - THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3, - "Index tensor must have same dimensions as output tensor"); - THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4, - "Input tensor must have same dimensions as output tensor"); - - elems_per_row = THLongTensor_size(index, dim); - - TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim, - for (i = 0; i < elems_per_row; ++i) - { - idx = *(index_data + i*index_stride); - if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE) - { - THFree(TH_TENSOR_DIM_APPLY_counter); - THError("Invalid index in scatterAdd"); - } - tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] += *(src_data + i*src_stride); - }) -} - -void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val) -{ - long elems_per_row, i, idx; - - THArgCheck(dim < THTensor_(nDimension)(tensor), 2, "Index dimension is out of bounds"); - THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3, - "Index tensor must have same dimensions as output tensor"); - - elems_per_row = THLongTensor_size(index, dim); - - TH_TENSOR_DIM_APPLY2(real, tensor, long, index, dim, - for (i = 0; i < elems_per_row; ++i) - { - idx = *(index_data + i*index_stride); - if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE) - { - THFree(TH_TENSOR_DIM_APPLY_counter); - THError("Invalid index in scatter"); - } - tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = val; - }) -} - -accreal THTensor_(dot)(THTensor *tensor, THTensor *src) -{ - accreal sum = 0; - /* we use a trick here. careful with that. */ - TH_TENSOR_APPLY2(real, tensor, real, src, - long sz = (tensor_size-tensor_i < src_size-src_i ? tensor_size-tensor_i : src_size-src_i); - sum += THBlas_(dot)(sz, src_data, src_stride, tensor_data, tensor_stride); - tensor_i += sz; - src_i += sz; - tensor_data += sz*tensor_stride; - src_data += sz*src_stride; - break;); - return sum; -} - - -#undef th_isnan -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) -#define th_isnan(val) \ -(isnan(val)) -#else -#define th_isnan(val) (0) -#endif - -#undef th_isnan_break -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) -#define th_isnan_break(val) \ -if (isnan(val)) break; -#else -#define th_isnan_break(val) -#endif - -real THTensor_(minall)(THTensor *tensor) -{ - real theMin; - real value; - - THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension"); - theMin = THTensor_(data)(tensor)[0]; - TH_TENSOR_APPLY(real, tensor, - value = *tensor_data; - /* This is not the same as value<theMin in the case of NaNs */ - if(!(value >= theMin)) - { - theMin = value; - th_isnan_break(value) - }); - return theMin; -} - -real THTensor_(maxall)(THTensor *tensor) -{ - real theMax; - real value; - - THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension"); - theMax = THTensor_(data)(tensor)[0]; - TH_TENSOR_APPLY(real, tensor, - value = *tensor_data; - /* This is not the same as value>theMax in the case of NaNs */ - if(!(value <= theMax)) - { - theMax = value; - th_isnan_break(value) - }); - return theMax; -} - -static void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride); - -real THTensor_(medianall)(THTensor *tensor) -{ - THArgCheck(tensor->nDimension > 0, 1, "tensor must have one dimension"); - - real theMedian; - ptrdiff_t numel; - long k; - THTensor *temp_; - real *temp__data; - - numel = THTensor_(nElement)(tensor); - k = (numel-1) >> 1; - - temp_ = THTensor_(newClone)(tensor); - temp__data = THTensor_(data)(temp_); - - THTensor_(quickselectnoidx)(temp__data, k, numel, 1); - - theMedian = temp__data[k]; - - THTensor_(free)(temp_); - - return theMedian; -} - -accreal THTensor_(sumall)(THTensor *tensor) -{ - accreal sum = 0; - TH_TENSOR_APPLY(real, tensor, sum += *tensor_data;); - return sum; -} - -accreal THTensor_(prodall)(THTensor *tensor) -{ - accreal prod = 1; - TH_TENSOR_APPLY(real, tensor, prod *= *tensor_data;); - return prod; -} - -void THTensor_(add)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(adds)(r__data, t_data, value, r__len);); - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;); - } -} - -void THTensor_(sub)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(add)(r_, t, -value); -} - -void THTensor_(mul)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(muls)(r__data, t_data, value, r__len);); - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;); - } -} - -void THTensor_(div)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(divs)(r__data, t_data, value, r__len);); - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;); - } -} - -void THTensor_(lshift)(THTensor *r_, THTensor *t, real value) -{ -#if defined(TH_REAL_IS_FLOAT) - return THTensor_(mul)(r_, t, powf(2, value)); -#elif defined(TH_REAL_IS_DOUBLE) - return THTensor_(mul)(r_, t, pow(2, value)); -#elif defined(TH_REAL_IS_HALF) - return THError("lshift is not supported for torch.HalfTensor"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - long sz = THTensor_(nElement)(t); - long i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_BYTE) - rp[i] = ((real) tp[i]) << value; -#else - rp[i] = ((unsigned real) tp[i]) << value; -#endif - } - } else { -#if defined(TH_REAL_IS_BYTE) - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((real) *t_data) << value);); -#else - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((unsigned real) *t_data) << value);); -#endif - } -#endif -} - -void THTensor_(rshift)(THTensor *r_, THTensor *t, real value) -{ -#if defined(TH_REAL_IS_FLOAT) - return THTensor_(div)(r_, t, powf(2, value)); -#elif defined(TH_REAL_IS_DOUBLE) - return THTensor_(div)(r_, t, pow(2, value)); -#elif defined(TH_REAL_IS_HALF) - return THError("rshift is not supported for torch.HalfTensor"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - long sz = THTensor_(nElement)(t); - long i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_BYTE) - rp[i] = ((real) tp[i]) >> value; -#else - rp[i] = ((unsigned real) tp[i]) >> value; -#endif - } - } else { -#if defined(TH_REAL_IS_BYTE) - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((real) *t_data) >> value);); -#else - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((unsigned real) *t_data) >> value);); -#endif - } -#endif -} - -void THTensor_(fmod)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - rp[i] = fmod(tp[i], value); -#else - rp[i] = tp[i] % value; -#endif - } - } else { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = fmod(*t_data, value);); -#else - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data % value);); -#endif - } -} - -void THTensor_(remainder)(THTensor *r_, THTensor *t, real value) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - rp[i] = (value == 0)? NAN : tp[i] - value * floor(tp[i] / value); -#else - // There is no NAN for integers - rp[i] = tp[i] % value; - if (rp[i] * value < 0) - rp[i] += value; -#endif - } - } else { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value);); -#else - // There is no NAN for integers - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data % value; - if (*r__data * value < 0) *r__data += value;); -#endif - } -} - -void THTensor_(bitand)(THTensor *r_, THTensor *t, real value) -{ -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) - return THError("bitand is only supported for integer type tensors"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - long sz = THTensor_(nElement)(t); - long i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) - for (i=0; i<sz; i++) { - rp[i] = tp[i] & value; - } - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data & value;); - } -#endif -} - -void THTensor_(bitor)(THTensor *r_, THTensor *t, real value) -{ -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) - return THError("bitor is only supported for integer type tensors"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - long sz = THTensor_(nElement)(t); - long i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) - for (i=0; i<sz; i++) { - rp[i] = tp[i] | value; - } - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data | value;); - } -#endif -} - -void THTensor_(bitxor)(THTensor *r_, THTensor *t, real value) -{ -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) - return THError("bitxor is only supported for integer type tensors"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - long sz = THTensor_(nElement)(t); - long i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i) - for (i=0; i<sz; i++) { - rp[i] = tp[i] ^ value; - } - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data ^ value;); - } -#endif -} - -void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - /* real t_val; */ - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) - rp[i] = (tp[i] < min_value) ? min_value : (tp[i] > max_value ? max_value : tp[i]); - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data);); - } -} - -void THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - if(r_ == t) { - THBlas_(axpy)(THTensor_(nElement)(t), value, THTensor_(data)(src), 1, THTensor_(data)(r_), 1); - } else { - TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cadd)(r__data, t_data, src_data, value, r__len);); - } - } else { - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data + value * *src_data;); - } -} - -void THTensor_(csub)(THTensor *r_, THTensor *t, real value,THTensor *src) -{ - THTensor_(cadd)(r_, t, -value, src); -} - -void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cmul)(r__data, t_data, src_data, r__len);); - } else { - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * *src_data;); - } -} - -void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) - rp[i] = pow(tp[i], sp[i]); - } else { - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = pow(*t_data, *src_data);); - } -} - -void THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cdiv)(r__data, t_data, src_data, r__len);); - } else { - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / *src_data;); - } -} - -void THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src) -{ -#if defined(TH_REAL_IS_HALF) - return THError("clshift is not supported for torch.HalfTensor"); -#endif - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(isContiguous)(src) && - THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_FLOAT) - rp[i] = tp[i] * powf(2, sp[i]); -#elif defined(TH_REAL_IS_DOUBLE) - rp[i] = tp[i] * pow(2, sp[i]); -#elif defined(TH_REAL_IS_BYTE) - rp[i] = ((real) tp[i]) << sp[i]; -#else - rp[i] = ((unsigned real) tp[i]) << sp[i]; -#endif - } - } else { -#if defined(TH_REAL_IS_FLOAT) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * powf(2, *src_data);); -#elif defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * pow(2, *src_data);); -#elif defined(TH_REAL_IS_BYTE) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((real)*t_data) << *src_data;); -#else - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((unsigned real)*t_data) << *src_data;); -#endif - } -} - -void THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src) -{ -#if defined(TH_REAL_IS_HALF) - return THError("crshift is not supported for torch.HalfTensor"); -#endif - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(isContiguous)(src) && - THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_FLOAT) - rp[i] = tp[i] / powf(2, sp[i]); -#elif defined(TH_REAL_IS_DOUBLE) - rp[i] = tp[i] / pow(2, sp[i]); -#elif defined(TH_REAL_IS_BYTE) - rp[i] = ((real) tp[i]) >> sp[i]; -#else - rp[i] = ((unsigned real) tp[i]) >> sp[i]; -#endif - } - } else { -#if defined(TH_REAL_IS_FLOAT) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / powf(2, *src_data);); -#elif defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / pow(2, *src_data);); -#elif defined(TH_REAL_IS_BYTE) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((real)*t_data) >> *src_data;); -#else - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((unsigned real)*t_data) >> *src_data;); -#endif - } -} - -void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - rp[i] = fmod(tp[i], sp[i]); -#else - rp[i] = tp[i] % sp[i]; -#endif - } - } else { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = fmod(*t_data, *src_data);); -#else - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = (*t_data % *src_data);); -#endif - - } -} - -void THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - rp[i] = (sp[i] == 0)? NAN : tp[i] - sp[i] * floor(tp[i] / sp[i]); -#else - // There is no NAN for integers - rp[i] = tp[i] % sp[i]; - if (rp[i] * sp[i] < 0) - rp[i] += sp[i]; -#endif - } - } else { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = (*src_data == 0)? NAN : *t_data - *src_data * floor(*t_data / *src_data);); -#else - // There is no NAN for integers - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data % *src_data; - if (*r__data * *src_data < 0) *r__data += *src_data;); -#endif - - } -} - -void THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src) -{ -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) - return THError("cbitand is only supported for integer type tensors"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(isContiguous)(src) && - THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { - rp[i] = tp[i] & sp[i]; - } - } else { - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data & *src_data;); - } -#endif -} - -void THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src) -{ -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) - return THError("cbitor is only supported for integer type tensors"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(isContiguous)(src) && - THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { - rp[i] = tp[i] | sp[i]; - } - } else { - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data | *src_data;); - } -#endif -} - -void THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src) -{ -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) - return THError("cbitxor is only supported for integer type tensors"); -#else - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && - THTensor_(isContiguous)(t) && - THTensor_(isContiguous)(src) && - THTensor_(nElement)(r_) == THTensor_(nElement)(src)) { - real *tp = THTensor_(data)(t); - real *sp = THTensor_(data)(src); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) { - rp[i] = tp[i] ^ sp[i]; - } - } else { - TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data ^ *src_data;); - } -#endif -} - -void THTensor_(tpow)(THTensor *r_, real value, THTensor *t) -{ - THTensor_(resizeAs)(r_, t); - if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) { - real *tp = THTensor_(data)(t); - real *rp = THTensor_(data)(r_); - ptrdiff_t sz = THTensor_(nElement)(t); - ptrdiff_t i; - #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i) - for (i=0; i<sz; i++) - rp[i] = pow(value, tp[i]); - } else { - TH_TENSOR_APPLY2(real, r_, real, t, *r__data = pow(value, *t_data);); - } -} - -void THTensor_(addcmul)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2) -{ - if(r_ != t) - { - THTensor_(resizeAs)(r_, t); - THTensor_(copy)(r_, t); - } - - TH_TENSOR_APPLY3(real, r_, real, src1, real, src2, *r__data += value * *src1_data * *src2_data;); -} - - -void THTensor_(addcdiv)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2) -{ - if(r_ != t) - { - THTensor_(resizeAs)(r_, t); - THTensor_(copy)(r_, t); - } - - TH_TENSOR_APPLY3(real, r_, real, src1, real, src2, *r__data += value * *src1_data / *src2_data;); -} - -void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat, THTensor *vec) -{ - if( (mat->nDimension != 2) || (vec->nDimension != 1) ) - THError("matrix and vector expected, got %dD, %dD", - mat->nDimension, vec->nDimension); - - if( mat->size[1] != vec->size[0] ) { - THDescBuff bm = THTensor_(sizeDesc)(mat); - THDescBuff bv = THTensor_(sizeDesc)(vec); - THError("size mismatch, %s, %s", bm.str, bv.str); - } - - if(t->nDimension != 1) - THError("vector expected, got t: %dD", t->nDimension); - - if(t->size[0] != mat->size[0]) { - THDescBuff bt = THTensor_(sizeDesc)(t); - THDescBuff bm = THTensor_(sizeDesc)(mat); - THError("size mismatch, t: %s, mat: %s", bt.str, bm.str); - } - - if(r_ != t) - { - THTensor_(resizeAs)(r_, t); - THTensor_(copy)(r_, t); - } - - if(mat->stride[0] == 1) - { - THBlas_(gemv)('n', mat->size[0], mat->size[1], - alpha, THTensor_(data)(mat), mat->stride[1], - THTensor_(data)(vec), vec->stride[0], - beta, THTensor_(data)(r_), r_->stride[0]); - } - else if(mat->stride[1] == 1) - { - THBlas_(gemv)('t', mat->size[1], mat->size[0], - alpha, THTensor_(data)(mat), mat->stride[0], - THTensor_(data)(vec), vec->stride[0], - beta, THTensor_(data)(r_), r_->stride[0]); - } - else - { - THTensor *cmat = THTensor_(newContiguous)(mat); - - THBlas_(gemv)('t', mat->size[1], mat->size[0], - alpha, THTensor_(data)(cmat), cmat->stride[0], - THTensor_(data)(vec), vec->stride[0], - beta, THTensor_(data)(r_), r_->stride[0]); - - THTensor_(free)(cmat); - } -} - -void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain) -{ - long N1 = m1->size[0]; - long N2 = m2->size[0]; - long dim; - real *m1_p; - real *m2_p; - real *r_p; - long i; - - THTensor_(resize2d)(r_, N1, N2); - - m1 = THTensor_(newContiguous)(m1); - m2 = THTensor_(newContiguous)(m2); - - THTensor_(resize2d)(m1, N1, THTensor_(nElement)(m1) / N1); - THTensor_(resize2d)(m2, N2, THTensor_(nElement)(m2) / N2); - - dim = m1->size[1]; - THArgCheck(m1->size[1] == m2->size[1], 3, "m1 and m2 must have the same inner vector dim"); - - m1_p = THTensor_(data)(m1); - m2_p = THTensor_(data)(m2); - r_p = THTensor_(data)(r_); - -#pragma omp parallel for private(i) - for (i=0; i<N1; i++) { - long j,k; - for (j=0; j<N2; j++) { - real sum = 0; - for (k=0; k<dim; k++) { - real term = m1_p[ i*dim + k ] - m2_p[ j*dim + k ]; - sum += term*term; - } - r_p[ i*N2 + j ] = gain * sum; - } - } - - THTensor_(free)(m1); - THTensor_(free)(m2); -} - -void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *m1, THTensor *m2) -{ - char transpose_r, transpose_m1, transpose_m2; - THTensor *r__, *m1_, *m2_; - - if( (m1->nDimension != 2) || (m2->nDimension != 2)) - THError("matrices expected, got %dD, %dD tensors", m1->nDimension, m2->nDimension); - - if(m1->size[1] != m2->size[0]) { - THDescBuff bm1 = THTensor_(sizeDesc)(m1); - THDescBuff bm2 = THTensor_(sizeDesc)(m2); - THError("size mismatch, m1: %s, m2: %s", bm1.str, bm2.str); - } - - if( t->nDimension != 2 ) - THError("matrix expected, got %dD tensor for t", t->nDimension); - - if( (t->size[0] != m1->size[0]) || (t->size[1] != m2->size[1]) ) { - THDescBuff bt = THTensor_(sizeDesc)(t); - THDescBuff bm1 = THTensor_(sizeDesc)(m1); - THDescBuff bm2 = THTensor_(sizeDesc)(m2); - THError("size mismatch, t: %s, m1: %s, m2: %s", bt.str, bm1.str, bm2.str); - } - - if(t != r_) - { - THTensor_(resizeAs)(r_, t); - THTensor_(copy)(r_, t); - } - - /* r_ */ - if(r_->stride[0] == 1 && - r_->stride[1] != 0) - { - transpose_r = 'n'; - r__ = r_; - } - else if(r_->stride[1] == 1 && - r_->stride[0] != 0) - { - THTensor *swap = m2; - m2 = m1; - m1 = swap; - transpose_r = 't'; - r__ = r_; - } - else - { - transpose_r = 'n'; - - THTensor *transp_r_ = THTensor_(newTranspose)(r_, 0, 1); - r__ = THTensor_(newClone)(transp_r_); - THTensor_(free)(transp_r_); - THTensor_(transpose)(r__, NULL, 0, 1); - } - - /* m1 */ - if(m1->stride[(transpose_r == 'n' ? 0 : 1)] == 1 && - m1->stride[(transpose_r == 'n' ? 1 : 0)] != 0) - { - transpose_m1 = 'n'; - m1_ = m1; - } - else if(m1->stride[(transpose_r == 'n' ? 1 : 0)] == 1 && - m1->stride[(transpose_r == 'n' ? 0 : 1)] != 0) - { - transpose_m1 = 't'; - m1_ = m1; - } - else - { - transpose_m1 = (transpose_r == 'n' ? 't' : 'n'); - m1_ = THTensor_(newContiguous)(m1); - } - - /* m2 */ - if(m2->stride[(transpose_r == 'n' ? 0 : 1)] == 1 && - m2->stride[(transpose_r == 'n' ? 1 : 0)] != 0) - { - transpose_m2 = 'n'; - m2_ = m2; - } - else if(m2->stride[(transpose_r == 'n' ? 1 : 0)] == 1 && - m2->stride[(transpose_r == 'n' ? 0 : 1)] != 0) - { - transpose_m2 = 't'; - m2_ = m2; - } - else - { - transpose_m2 = (transpose_r == 'n' ? 't' : 'n'); - m2_ = THTensor_(newContiguous)(m2); - } - -#pragma omp critical(blasgemm) - /* do the operation */ - THBlas_(gemm)(transpose_m1, - transpose_m2, - r__->size[(transpose_r == 'n' ? 0 : 1)], - r__->size[(transpose_r == 'n' ? 1 : 0)], - m1_->size[(transpose_r == 'n' ? 1 : 0)], - alpha, - THTensor_(data)(m1_), - (transpose_m1 == 'n' ? m1_->stride[(transpose_r == 'n' ? 1 : 0)] : m1_->stride[(transpose_r == 'n' ? 0 : 1)]), - THTensor_(data)(m2_), - (transpose_m2 == 'n' ? m2_->stride[(transpose_r == 'n' ? 1 : 0)] : m2_->stride[(transpose_r == 'n' ? 0 : 1)]), - beta, - THTensor_(data)(r__), - r__->stride[(transpose_r == 'n' ? 1 : 0)]); - - /* free intermediate variables */ - if(m1_ != m1) - THTensor_(free)(m1_); - - if(m2_ != m2) - THTensor_(free)(m2_); - - if(r__ != r_) - THTensor_(freeCopyTo)(r__, r_); -} - -void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2) -{ - if( (vec1->nDimension != 1) || (vec2->nDimension != 1) ) - THError("vector and vector expected, got %dD, %dD tensors", - vec1->nDimension, vec2->nDimension); - - if(t->nDimension != 2) - THError("expected matrix, got %dD tensor for t", t->nDimension); - - if( (t->size[0] != vec1->size[0]) || (t->size[1] != vec2->size[0]) ) { - THDescBuff bt = THTensor_(sizeDesc)(t); - THDescBuff bv1 = THTensor_(sizeDesc)(vec1); - THDescBuff bv2 = THTensor_(sizeDesc)(vec2); - THError("size mismatch, t: %s, vec1: %s, vec2: %s", bt.str, bv1.str, bv2.str); - } - - if(r_ != t) - { - THTensor_(resizeAs)(r_, t); - THTensor_(copy)(r_, t); - } - - if(beta == 0) { - THTensor_(zero)(r_); - } - else if(beta != 1) - THTensor_(mul)(r_, r_, beta); - - if(r_->stride[0] == 1) - { - THBlas_(ger)(vec1->size[0], vec2->size[0], - alpha, THTensor_(data)(vec1), vec1->stride[0], - THTensor_(data)(vec2), vec2->stride[0], - THTensor_(data)(r_), r_->stride[1]); - } - else if(r_->stride[1] == 1) - { - THBlas_(ger)(vec2->size[0], vec1->size[0], - alpha, THTensor_(data)(vec2), vec2->stride[0], - THTensor_(data)(vec1), vec1->stride[0], - THTensor_(data)(r_), r_->stride[0]); - } - else - { - THTensor *cr = THTensor_(newClone)(r_); - - THBlas_(ger)(vec2->size[0], vec1->size[0], - alpha, THTensor_(data)(vec2), vec2->stride[0], - THTensor_(data)(vec1), vec1->stride[0], - THTensor_(data)(cr), cr->stride[0]); - - THTensor_(freeCopyTo)(cr, r_); - } -} - -void THTensor_(addbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2) -{ - long batch; - - THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, "expected 3D tensor"); - THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, "expected 3D tensor"); - THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2, - "equal number of batches expected, got %d, %d", - THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0)); - THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2, - "wrong matrix size, batch1: %dx%d, batch2: %dx%d", - THTensor_(size)(batch1, 1), THTensor_(size)(batch1,2), - THTensor_(size)(batch2, 1), THTensor_(size)(batch2,2)); - - long dim1 = THTensor_(size)(batch1, 1); - long dim2 = THTensor_(size)(batch2, 2); - THArgCheck(THTensor_(size)(t, 0) == dim1, 1, "output tensor of incorrect size"); - THArgCheck(THTensor_(size)(t, 1) == dim2, 1, "output tensor of incorrect size"); - - if (t != result) { - THTensor_(resizeAs)(result, t); - THTensor_(copy)(result, t); - } - - THTensor *matrix1 = THTensor_(new)(); - THTensor *matrix2 = THTensor_(new)(); - - for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) { - THTensor_(select)(matrix1, batch1, 0, batch); - THTensor_(select)(matrix2, batch2, 0, batch); - - THTensor_(addmm)(result, beta, result, alpha, matrix1, matrix2); - beta = 1; // accumulate output once - } - - THTensor_(free)(matrix1); - THTensor_(free)(matrix2); -} - -void THTensor_(baddbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2) -{ - long batch; - - THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, "expected 3D tensor, got %dD", THTensor_(nDimension)(batch1)); - THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, "expected 3D tensor, got %dD", THTensor_(nDimension)(batch2)); - THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2, - "equal number of batches expected, got %d, %d", - THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0)); - THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2, - "wrong matrix size, batch1: %dx%d, batch2: %dx%d", - THTensor_(size)(batch1, 1), THTensor_(size)(batch1, 2), - THTensor_(size)(batch2, 1), THTensor_(size)(batch2, 2)); - - long bs = THTensor_(size)(batch1, 0); - long dim1 = THTensor_(size)(batch1, 1); - long dim2 = THTensor_(size)(batch2, 2); - THArgCheck(THTensor_(size)(t, 0) == bs, 1, "output tensor of incorrect size"); - THArgCheck(THTensor_(size)(t, 1) == dim1, 1, "output tensor of incorrect size"); - THArgCheck(THTensor_(size)(t, 2) == dim2, 1, "output tensor of incorrect size"); - - if (t != result) { - THTensor_(resizeAs)(result, t); - THTensor_(copy)(result, t); - } - - THTensor *matrix1 = THTensor_(new)(); - THTensor *matrix2 = THTensor_(new)(); - THTensor *result_matrix = THTensor_(new)(); - - for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) { - THTensor_(select)(matrix1, batch1, 0, batch); - THTensor_(select)(matrix2, batch2, 0, batch); - THTensor_(select)(result_matrix, result, 0, batch); - - THTensor_(addmm)(result_matrix, beta, result_matrix, alpha, matrix1, matrix2); - } - - THTensor_(free)(matrix1); - THTensor_(free)(matrix2); - THTensor_(free)(result_matrix); -} - -ptrdiff_t THTensor_(numel)(THTensor *t) -{ - return THTensor_(nElement)(t); -} - -void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) -{ - THLongStorage *dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", - dimension + TH_INDEX_BASE); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(values_, dim, NULL); - THLongTensor_resize(indices_, dim, NULL); - THLongStorage_free(dim); - - // two implementations optimized for data locality - if (t->stride[dimension] == 1) { - real theMax; - real value; - long theIndex; - long i; - TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, - theMax = t_data[0]; - theIndex = 0; - - for(i = 0; i < t_size; i++) - { - value = t_data[i*t_stride]; - /* This is not the same as value>theMax in the case of NaNs */ - if(!(value <= theMax)) - { - theIndex = i; - theMax = value; - th_isnan_break(value) - } - } - *indices__data = theIndex; - *values__data = theMax;); - } else { - if (THTensor_(nDimension)(t) > 1) { - THTensor *t0 = THTensor_(newSelect)(t, dimension, 0); - THTensor_(copy)(values_, t0); - THTensor_(free)(t0); - } else { - THTensor_(fill)(values_, THTensor_(get1d)(t, 0)); - } - THLongTensor_zero(indices_); - - if(t->size[dimension] == 1) { - return; - } - - THTensor *tempValues_ = THTensor_(newWithTensor)(values_); - // tempValues_.expand_as(t) - tempValues_->size[dimension] = t->size[dimension]; - tempValues_->stride[dimension] = 0; - - THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_); - // tempIndices_.expand_as(t) - tempIndices_->size[dimension] = t->size[dimension]; - tempIndices_->stride[dimension] = 0; - - TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension, - if(!(*t_data <= *tempValues__data) && !th_isnan(*tempValues__data)) { - *tempValues__data = *t_data; - *tempIndices__data = *tempIndices__dimOffset; - }); - - THTensor_(free)(tempValues_); - THLongTensor_free(tempIndices_); - } - - if (!keepdim) { - THTensor_(squeeze1d)(values_, values_, dimension); - THLongTensor_squeeze1d(indices_, indices_, dimension); - } -} - -void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) -{ - THLongStorage *dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", - dimension + TH_INDEX_BASE); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(values_, dim, NULL); - THLongTensor_resize(indices_, dim, NULL); - THLongStorage_free(dim); - - // two implementations optimized for data locality - if (t->stride[dimension] == 1) { - real theMax; - real value; - long theIndex; - long i; - TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, - theMax = t_data[0]; - theIndex = 0; - - for(i = 0; i < t_size; i++) - { - value = t_data[i*t_stride]; - /* This is not the same as value>theMax in the case of NaNs */ - if(!(value >= theMax)) - { - theIndex = i; - theMax = value; - th_isnan_break(value) - } - } - *indices__data = theIndex; - *values__data = theMax;); - } else { - if (THTensor_(nDimension)(t) > 1) { - THTensor *t0 = THTensor_(newSelect)(t, dimension, 0); - THTensor_(copy)(values_, t0); - THTensor_(free)(t0); - } else { - THTensor_(fill)(values_, THTensor_(get1d)(t, 0)); - } - THLongTensor_zero(indices_); - - if(t->size[dimension] == 1) { - return; - } - - THTensor *tempValues_ = THTensor_(newWithTensor)(values_); - // tempValues_.expand_as(t) - tempValues_->size[dimension] = t->size[dimension]; - tempValues_->stride[dimension] = 0; - - THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_); - // tempIndices_.expand_as(t) - tempIndices_->size[dimension] = t->size[dimension]; - tempIndices_->stride[dimension] = 0; - - TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension, - if(!(*t_data >= *tempValues__data) && !th_isnan(*tempValues__data)) { - *tempValues__data = *t_data; - *tempIndices__data = *tempIndices__dimOffset; - }); - } - - if (!keepdim) { - THTensor_(squeeze1d)(values_, values_, dimension); - THLongTensor_squeeze1d(indices_, indices_, dimension); - } -} - - -void THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim) -{ - THLongStorage *dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", - dimension + TH_INDEX_BASE); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(r_, dim, NULL); - THLongStorage_free(dim); - - // two implementations optimized for data locality - if (t->stride[dimension] == 1) { - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal sum = 0; - long i; - for(i = 0; i < t_size; i++) - sum += t_data[i*t_stride]; - *r__data = (real)sum;); - } else { - THTensor_(zero)(r_); - THTensor *temp_ = THTensor_(newWithTensor)(r_); - // r_.expand_as(t) - temp_->size[dimension] = t->size[dimension]; - temp_->stride[dimension] = 0; - - TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data + *t_data;); - THTensor_(free)(temp_); - } - - if (!keepdim) { - THTensor_(squeeze1d)(r_, r_, dimension); - } -} - -void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim) -{ - THLongStorage *dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", - dimension + TH_INDEX_BASE); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(r_, dim, NULL); - THLongStorage_free(dim); - - // two implementations optimized for data locality - if (t->stride[dimension] == 1) { - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal prod = 1; - long i; - for(i = 0; i < t_size; i++) - prod *= t_data[i*t_stride]; - *r__data = (real)prod;); - } else { - THTensor_(fill)(r_, 1); - THTensor *temp_ = THTensor_(newWithTensor)(r_); - // r_.expand_as(t) - temp_->size[dimension] = t->size[dimension]; - temp_->stride[dimension] = 0; - - TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data * *t_data;); - THTensor_(free)(temp_); - } - - if (!keepdim) { - THTensor_(squeeze1d)(r_, r_, dimension); - } -} - -void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension) -{ - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", - dimension + TH_INDEX_BASE); - - THTensor_(resizeAs)(r_, t); - - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal cumsum = 0; - long i; - for(i = 0; i < t_size; i++) - { - cumsum += t_data[i*t_stride]; - r__data[i*r__stride] = (real)cumsum; - }); -} - -void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension) -{ - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range", - dimension + TH_INDEX_BASE); - - THTensor_(resizeAs)(r_, t); - - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal cumprod = 1; - long i; - for(i = 0; i < t_size; i++) - { - cumprod *= t_data[i*t_stride]; - r__data[i*r__stride] = (real)cumprod; - }); -} - - -void THTensor_(sign)(THTensor *r_, THTensor *t) -{ - THTensor_(resizeAs)(r_, t); - -#if defined (TH_REAL_IS_BYTE) - TH_TENSOR_APPLY2(real, r_, real, t, - if (*t_data > 0) *r__data = 1; - else *r__data = 0;); -#else - TH_TENSOR_APPLY2(real, r_, real, t, - if (*t_data > 0) *r__data = 1; - else if (*t_data < 0) *r__data = -1; - else *r__data = 0;); -#endif -} - - -accreal THTensor_(trace)(THTensor *t) -{ - real *t_data = THTensor_(data)(t); - accreal sum = 0; - long i = 0; - long t_stride_0, t_stride_1, t_diag_size; - - THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix"); - - t_stride_0 = THTensor_(stride)(t, 0); - t_stride_1 = THTensor_(stride)(t, 1); - t_diag_size = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1)); - while(i < t_diag_size) - { - sum += t_data[i*(t_stride_0+t_stride_1)]; - i++; - } - - return sum; -} - -void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension) -{ - int i; - - if(THTensor_(nDimension)(a) != THTensor_(nDimension)(b)) - THError("inconsistent tensor dimension %dD, %dD", - THTensor_(nDimension)(a), THTensor_(nDimension)(b)); - - for(i = 0; i < THTensor_(nDimension)(a); i++) - { - if(THTensor_(size)(a, i) != THTensor_(size)(b, i)) { - THDescBuff ba = THTensor_(sizeDesc)(a); - THDescBuff bb = THTensor_(sizeDesc)(b); - THError("inconsistent tensor sizes %s, %s", ba.str, bb.str); - } - } - - if(dimension < 0) - { - for(i = 0; i < THTensor_(nDimension)(a); i++) - { - if(THTensor_(size)(a, i) == 3) - { - dimension = i; - break; - } - } - if(dimension < 0) { - THDescBuff ba = THTensor_(sizeDesc)(a); - THError("no dimension of size 3 in a: %s", ba.str); - } - } - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(a), 3, "dimension %d out of range", - dimension + TH_INDEX_BASE); - THArgCheck(THTensor_(size)(a, dimension) == 3, 3, "dimension %d does not have size 3", - dimension + TH_INDEX_BASE); - - THTensor_(resizeAs)(r_, a); - - TH_TENSOR_DIM_APPLY3(real, a, real, b, real, r_, dimension, - r__data[0*r__stride] = a_data[1*a_stride]*b_data[2*b_stride] - a_data[2*a_stride]*b_data[1*b_stride]; - r__data[1*r__stride] = a_data[2*a_stride]*b_data[0*b_stride] - a_data[0*a_stride]*b_data[2*b_stride]; - r__data[2*r__stride] = a_data[0*a_stride]*b_data[1*b_stride] - a_data[1*a_stride]*b_data[0*b_stride];); -} - -void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src) { - THTensor_(resizeAs)(r, t); - TH_TENSOR_APPLY3(real, r, real, t, real, src, - *r_data = *t_data > *src_data ? *t_data : *src_data;); -} - -void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src) { - THTensor_(resizeAs)(r, t); - TH_TENSOR_APPLY3(real, r, real, t, real, src, - *r_data = *t_data < *src_data ? *t_data : *src_data;); -} - -void THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value) { - THTensor_(resizeAs)(r, t); - TH_TENSOR_APPLY2(real, r, real, t, - *r_data = *t_data > value ? *t_data : value;); -} - -void THTensor_(cminValue)(THTensor *r, THTensor *t, real value) { - THTensor_(resizeAs)(r, t); - TH_TENSOR_APPLY2(real, r, real, t, - *r_data = *t_data < value ? *t_data : value;); -} - -void THTensor_(zeros)(THTensor *r_, THLongStorage *size) -{ - THTensor_(resize)(r_, size, NULL); - THTensor_(zero)(r_); -} - -void THTensor_(ones)(THTensor *r_, THLongStorage *size) -{ - THTensor_(resize)(r_, size, NULL); - THTensor_(fill)(r_, 1); -} - -void THTensor_(diag)(THTensor *r_, THTensor *t, int k) -{ - THArgCheck(THTensor_(nDimension)(t) == 1 || THTensor_(nDimension)(t) == 2, 1, "matrix or a vector expected"); - - if(THTensor_(nDimension)(t) == 1) - { - real *t_data = THTensor_(data)(t); - long t_stride_0 = THTensor_(stride)(t, 0); - long t_size = THTensor_(size)(t, 0); - long sz = t_size + (k >= 0 ? k : -k); - real *r__data; - long r__stride_0; - long r__stride_1; - long i; - - THTensor_(resize2d)(r_, sz, sz); - THTensor_(zero)(r_); - r__data = THTensor_(data)(r_); - r__stride_0 = THTensor_(stride)(r_, 0); - r__stride_1 = THTensor_(stride)(r_, 1); - r__data += (k >= 0 ? k*r__stride_1 : -k*r__stride_0); - - for(i = 0; i < t_size; i++) - r__data[i*(r__stride_0+r__stride_1)] = t_data[i*t_stride_0]; - } - else - { - real *t_data = THTensor_(data)(t); - long t_stride_0 = THTensor_(stride)(t, 0); - long t_stride_1 = THTensor_(stride)(t, 1); - long sz; - real *r__data; - long r__stride_0; - long i; - - if(k >= 0) - sz = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1)-k); - else - sz = THMin(THTensor_(size)(t, 0)+k, THTensor_(size)(t, 1)); - THTensor_(resize1d)(r_, sz); - r__data = THTensor_(data)(r_); - r__stride_0 = THTensor_(stride)(r_, 0); - - t_data += (k >= 0 ? k*t_stride_1 : -k*t_stride_0); - for(i = 0; i < sz; i++) - r__data[i*r__stride_0] = t_data[i*(t_stride_0+t_stride_1)]; - } -} - -void THTensor_(eye)(THTensor *r_, long n, long m) -{ - real *r__data; - long i, sz; - - THArgCheck(n > 0, 1, "invalid argument"); - - if(m <= 0) - m = n; - - THTensor_(resize2d)(r_, n, m); - THTensor_(zero)(r_); - - i = 0; - r__data = THTensor_(data)(r_); - sz = THMin(THTensor_(size)(r_, 0), THTensor_(size)(r_, 1)); - for(i = 0; i < sz; i++) - r__data[i*(r_->stride[0]+r_->stride[1])] = 1; -} - - -void THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step) -{ - ptrdiff_t size; - real i = 0; - - THArgCheck(step > 0 || step < 0, 3, "step must be a non-null number"); - THArgCheck(((step > 0) && (xmax >= xmin)) || ((step < 0) && (xmax <= xmin)) - , 2, "upper bound and larger bound incoherent with step sign"); - - size = (ptrdiff_t) (((xmax - xmin) / step) + 1); - - if (THTensor_(nElement)(r_) != size) { - THTensor_(resize1d)(r_, size); - } - - TH_TENSOR_APPLY(real, r_, *r__data = xmin + (i++)*step;); -} - -void THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step) { -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - int m = fmod(xmax - xmin,step) == 0; -#else - int m = (xmax - xmin) % step == 0; -#endif - if (m) - xmax -= step; - THTensor_(range)(r_,xmin,xmax,step); -} - -void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n) -{ - real *r__data; - long r__stride_0; - long i; - - THArgCheck(n > 0, 1, "must be strictly positive"); - - THTensor_(resize1d)(r_, n); - r__data = THTensor_(data)(r_); - r__stride_0 = THTensor_(stride)(r_,0); - - for(i = 0; i < n; i++) - r__data[i*r__stride_0] = (real)(i); - - for(i = 0; i < n-1; i++) - { - long z = THRandom_random(_generator) % (n-i); - real sav = r__data[i*r__stride_0]; - r__data[i*r__stride_0] = r__data[(z+i)*r__stride_0]; - r__data[(z+i)*r__stride_0] = sav; - } -} - -void THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size) -{ - THTensor_(resize)(r_, size, NULL); - THTensor_(copy)(r_, t); -} - -/* I cut and pasted (slightly adapted) the quicksort code from - Sedgewick's 1978 "Implementing Quicksort Programs" article - http://www.csie.ntu.edu.tw/~b93076/p847-sedgewick.pdf - - It is the state of the art existing implementation. The macros - are here to make as close a match as possible to the pseudocode of - Program 2 p.851 - - Note that other partition schemes exist, and are typically presented - in textbook, but those are less efficient. See e.g. - http://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto - - Julien, November 12th 2013 -*/ -#define MAX_LEVELS 300 -#define M_SMALL 10 /* Limit for small subfiles */ - -#define ARR(III) arr[(III)*stride] -#define IDX(III) idx[(III)*stride] - -#define LONG_SWAP(AAA, BBB) swap = AAA; AAA = BBB; BBB = swap -#define REAL_SWAP(AAA, BBB) rswap = AAA; AAA = BBB; BBB = rswap - -#define ARR_SWAP(III, JJJ) \ - REAL_SWAP(ARR(III), ARR(JJJ)); - -#define BOTH_SWAP(III, JJJ) \ - REAL_SWAP(ARR(III), ARR(JJJ)); \ - LONG_SWAP(IDX(III), IDX(JJJ)) - -static void THTensor_(quicksortascend)(real *arr, long *idx, long elements, long stride) -{ - long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left; - real rswap, piv; - unsigned char done = 0; - - /* beg[0]=0; end[0]=elements; */ - stack = 0; - L = 0; R = elements-1; - done = elements-1 <= M_SMALL; - - while(!done) { - /* Use median of three for pivot choice */ - P=(L+R)>>1; - BOTH_SWAP(P, L+1); - if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); } - if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); } - if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); } - - i = L+1; j = R; piv = ARR(L); pid = IDX(L); - - do { - do { i = i+1; } while(ARR(i) < piv); - do { j = j-1; } while(ARR(j) > piv); - if (j < i) - break; - BOTH_SWAP(i, j); - } while(1); - BOTH_SWAP(L, j); - /* Left subfile is (L, j-1) */ - /* Right subfile is (i, R) */ - sz_left = j-L; - sz_right = R-i+1; - if (sz_left <= M_SMALL && sz_right <= M_SMALL) { - /* both subfiles are small */ - /* if stack empty */ - if (stack == 0) { - done = 1; - } else { - stack--; - L = beg[stack]; - R = end[stack]; - } - } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) { - /* exactly one of the subfiles is small */ - /* (L,R) = large subfile */ - if (sz_left > sz_right) { - /* Implicit: L = L; */ - R = j-1; - } else { - L = i; - /* Implicit: R = R; */ - } - } else { - /* none of the subfiles is small */ - /* push large subfile */ - /* (L,R) = small subfile */ - if (sz_left > sz_right) { - beg[stack] = L; - end[stack] = j-1; - stack++; - L = i; - /* Implicit: R = R */ - } else { - beg[stack] = i; - end[stack] = R; - stack++; - /* Implicit: L = L; */ - R = j-1; - } - } - } /* while not done */ - /* Now insertion sort on the concatenation of subfiles */ - for(i=elements-2; i>=0; i--) { - if (ARR(i) > ARR(i+1)) { - piv = ARR(i); - pid = IDX(i); - j = i+1; - do { - ARR(j-1) = ARR(j); - IDX(j-1) = IDX(j); - j = j+1; - } while(j < elements && ARR(j) < piv); - ARR(j-1) = piv; - IDX(j-1) = pid; - } - } -} - -static void THTensor_(quicksortdescend)(real *arr, long *idx, long elements, long stride) -{ - long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left; - real rswap, piv; - unsigned char done = 0; - - /* beg[0]=0; end[0]=elements; */ - stack = 0; - L = 0; R = elements-1; - done = elements-1 <= M_SMALL; - - while(!done) { - /* Use median of three for pivot choice */ - P=(L+R)>>1; - BOTH_SWAP(P, L+1); - if (ARR(L+1) < ARR(R)) { BOTH_SWAP(L+1, R); } - if (ARR(L) < ARR(R)) { BOTH_SWAP(L, R); } - if (ARR(L+1) < ARR(L)) { BOTH_SWAP(L+1, L); } - - i = L+1; j = R; piv = ARR(L); pid = IDX(L); - - do { - do { i = i+1; } while(ARR(i) > piv); - do { j = j-1; } while(ARR(j) < piv); - if (j < i) - break; - BOTH_SWAP(i, j); - } while(1); - BOTH_SWAP(L, j); - /* Left subfile is (L, j-1) */ - /* Right subfile is (i, R) */ - sz_left = j-L; - sz_right = R-i+1; - if (sz_left <= M_SMALL && sz_right <= M_SMALL) { - /* both subfiles are small */ - /* if stack empty */ - if (stack == 0) { - done = 1; - } else { - stack--; - L = beg[stack]; - R = end[stack]; - } - } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) { - /* exactly one of the subfiles is small */ - /* (L,R) = large subfile */ - if (sz_left > sz_right) { - /* Implicit: L = L; */ - R = j-1; - } else { - L = i; - /* Implicit: R = R; */ - } - } else { - /* none of the subfiles is small */ - /* push large subfile */ - /* (L,R) = small subfile */ - if (sz_left > sz_right) { - beg[stack] = L; - end[stack] = j-1; - stack++; - L = i; - /* Implicit: R = R */ - } else { - beg[stack] = i; - end[stack] = R; - stack++; - /* Implicit: L = L; */ - R = j-1; - } - } - } /* while not done */ - /* Now insertion sort on the concatenation of subfiles */ - for(i=elements-2; i>=0; i--) { - if (ARR(i) < ARR(i+1)) { - piv = ARR(i); - pid = IDX(i); - j = i+1; - do { - ARR(j-1) = ARR(j); - IDX(j-1) = IDX(j); - j = j+1; - } while(j < elements && ARR(j) > piv); - ARR(j-1) = piv; - IDX(j-1) = pid; - } - } -} - -#undef MAX_LEVELS -#undef M_SMALL - -void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder) -{ - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "invalid dimension %d", - dimension + TH_INDEX_BASE); - - THTensor_(resizeAs)(rt_, t); - THTensor_(copy)(rt_, t); - - { - THLongStorage *size = THTensor_(newSizeOf)(t); - THLongTensor_resize(ri_, size, NULL); - THLongStorage_free(size); - } - - if(descendingOrder) - { - TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension, - long i; - for(i = 0; i < ri__size; i++) - ri__data[i*ri__stride] = i; - THTensor_(quicksortdescend)(rt__data, ri__data, rt__size, rt__stride);) - } - else - { - TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension, - long i; - for(i = 0; i < ri__size; i++) - ri__data[i*ri__stride] = i; - THTensor_(quicksortascend)(rt__data, ri__data, rt__size, rt__stride);) - } -} - -/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's -public domain implementation at http://ndevilla.free.fr/median/median/ -Adapted similarly to the above Quicksort algorithm. -This version does not produce indices along with values. */ -static void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride) -{ - long P, L, R, i, j, swap; - real rswap, piv; - L = 0; - R = elements-1; - - do { - if (R <= L) /* One element only */ - return; - - if (R == L+1) { /* Two elements only */ - if (ARR(L) > ARR(R)) { - ARR_SWAP(L, R); - } - return; - } - - /* Use median of three for pivot choice */ - P=(L+R)>>1; - ARR_SWAP(P, L+1); - if (ARR(L+1) > ARR(R)) { ARR_SWAP(L+1, R); } - if (ARR(L) > ARR(R)) { ARR_SWAP(L, R); } - if (ARR(L+1) > ARR(L)) { ARR_SWAP(L+1, L); } - - i = L+1; - j = R; - piv = ARR(L); - do { - do i++; while(ARR(i) < piv); - do j--; while(ARR(j) > piv); - if (j < i) - break; - ARR_SWAP(i, j); - } while(1); - ARR_SWAP(L, j); - - /* Re-set active partition */ - if (j <= k) L=i; - if (j >= k) R=j-1; - } while(1); -} - -/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's -public domain implementation at http://ndevilla.free.fr/median/median/ -Adapted similarly to the above Quicksort algorithm. */ -static void THTensor_(quickselect)(real *arr, long *idx, long k, long elements, long stride) -{ - long P, L, R, i, j, swap, pid; - real rswap, piv; - L = 0; - R = elements-1; - - do { - if (R <= L) /* One element only */ - return; - - if (R == L+1) { /* Two elements only */ - if (ARR(L) > ARR(R)) { - BOTH_SWAP(L, R); - } - return; - } - - /* Use median of three for pivot choice */ - P=(L+R)>>1; - BOTH_SWAP(P, L+1); - if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); } - if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); } - if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); } - - i = L+1; - j = R; - piv = ARR(L); - pid = IDX(L); - do { - do i++; while(ARR(i) < piv); - do j--; while(ARR(j) > piv); - if (j < i) - break; - BOTH_SWAP(i, j); - } while(1); - BOTH_SWAP(L, j); - - /* Re-set active partition */ - if (j <= k) L=i; - if (j >= k) R=j-1; - } while(1); -} - -#undef ARR -#undef IDX -#undef LONG_SWAP -#undef REAL_SWAP -#undef BOTH_SWAP - -void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) -{ - THLongStorage *dim; - THTensor *temp_; - THLongTensor *tempi_; - real *temp__data; - long *tempi__data; - long t_size_dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range"); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(values_, dim, NULL); - THLongTensor_resize(indices_, dim, NULL); - THLongStorage_free(dim); - - t_size_dim = THTensor_(size)(t, dimension); - - temp_ = THTensor_(new)(); - THTensor_(resize1d)(temp_, t_size_dim); - temp__data = THTensor_(data)(temp_); - - tempi_ = THLongTensor_new(); - THLongTensor_resize1d(tempi_, t_size_dim); - tempi__data = THLongTensor_data(tempi_); - - TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, - long i; - real mode = 0; - long modei = 0; - long temp_freq = 0; - long max_freq = 0; - for(i = 0; i < t_size_dim; i++) - temp__data[i] = t_data[i*t_stride]; - for(i = 0; i < t_size_dim; i++) - tempi__data[i] = i; - THTensor_(quicksortascend)(temp__data, tempi__data, t_size_dim, 1); - - for(i = 0; i < t_size_dim; i++) - { - temp_freq++; - if ((i == t_size_dim - 1) || (temp__data[i] != temp__data[i+1])) - { - if (temp_freq > max_freq) - { - mode = temp__data[i]; - modei = tempi__data[i]; - max_freq = temp_freq; - } - temp_freq = 0; - } - } - *values__data = mode; - *indices__data = modei;); - - THTensor_(free)(temp_); - THLongTensor_free(tempi_); - if (!keepdim) { - THTensor_(squeeze1d)(values_, values_, dimension); - THLongTensor_squeeze1d(indices_, indices_, dimension); - } -} - -void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim) -{ - THLongStorage *dim; - THTensor *temp_; - THLongTensor *tempi_; - real *temp__data; - long *tempi__data; - long t_size_dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range"); - THArgCheck(k > 0 && k <= t->size[dimension], 2, "selected index out of range"); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(values_, dim, NULL); - THLongTensor_resize(indices_, dim, NULL); - THLongStorage_free(dim); - - t_size_dim = THTensor_(size)(t, dimension); - - temp_ = THTensor_(new)(); - THTensor_(resize1d)(temp_, t_size_dim); - temp__data = THTensor_(data)(temp_); - - tempi_ = THLongTensor_new(); - THLongTensor_resize1d(tempi_, t_size_dim); - tempi__data = THLongTensor_data(tempi_); - - TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension, - long i; - for(i = 0; i < t_size_dim; i++) - temp__data[i] = t_data[i*t_stride]; - for(i = 0; i < t_size_dim; i++) - tempi__data[i] = i; - THTensor_(quickselect)(temp__data, tempi__data, k - 1, t_size_dim, 1); - *values__data = temp__data[k-1]; - *indices__data = tempi__data[k-1];); - - THTensor_(free)(temp_); - THLongTensor_free(tempi_); - if (!keepdim) { - THTensor_(squeeze1d)(values_, values_, dimension); - THLongTensor_squeeze1d(indices_, indices_, dimension); - } -} - -void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim) -{ - long t_size_dim, k; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "dimension out of range"); - - t_size_dim = THTensor_(size)(t, dimension); - k = (t_size_dim-1) >> 1; /* take middle or one-before-middle element */ - - THTensor_(kthvalue)(values_, indices_, t, k+1, dimension, keepdim); -} - -void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted) -{ - int numDims = THTensor_(nDimension)(t); - THArgCheck(dim >= 0 && dim < numDims, 3, "dim not in range"); - - long sliceSize = THTensor_(size)(t, dim); - THArgCheck(k > 0 && k <= sliceSize, 2, "k not in range for dimension"); - - THTensor *tmpResults = THTensor_(new)(); - THTensor_(resize1d)(tmpResults, sliceSize); - real *tmp__data = THTensor_(data)(tmpResults); - - THLongTensor *tmpIndices = THLongTensor_new(); - THLongTensor_resize1d(tmpIndices, sliceSize); - long *tmpi__data = THLongTensor_data(tmpIndices); - - THLongStorage *topKSize = THTensor_(newSizeOf)(t); - THLongStorage_set(topKSize, dim, k); - THTensor_(resize)(rt_, topKSize, NULL); - THLongTensor_resize(ri_, topKSize, NULL); - THLongStorage_free(topKSize); - - if (dir) { - /* k largest elements, descending order (optional: see sorted) */ - long K = sliceSize - k; - TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim, - long i; - for(i = 0; i < sliceSize; i++) - { - tmp__data[i] = t_data[i*t_stride]; - tmpi__data[i] = i; - } - if (K > 0) - THTensor_(quickselect)(tmp__data, tmpi__data, K - 1, sliceSize, 1); - if (sorted) - THTensor_(quicksortdescend)(tmp__data + K, tmpi__data + K, k, 1); - for(i = 0; i < k; i++) - { - rt__data[i*rt__stride] = tmp__data[i + K]; - ri__data[i*ri__stride] = tmpi__data[i + K]; - }) - } - else { - /* k smallest elements, ascending order (optional: see sorted) */ - TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim, - long i; - for(i = 0; i < sliceSize; i++) - { - tmp__data[i] = t_data[i*t_stride]; - tmpi__data[i] = i; - } - THTensor_(quickselect)(tmp__data, tmpi__data, k - 1, sliceSize, 1); - if (sorted) - THTensor_(quicksortascend)(tmp__data, tmpi__data, k - 1, 1); - for(i = 0; i < k; i++) - { - rt__data[i*rt__stride] = tmp__data[i]; - ri__data[i*ri__stride] = tmpi__data[i]; - }) - } - - THTensor_(free)(tmpResults); - THLongTensor_free(tmpIndices); -} - -void THTensor_(tril)(THTensor *r_, THTensor *t, long k) -{ - long t_size_0, t_size_1; - long t_stride_0, t_stride_1; - long r__stride_0, r__stride_1; - real *t_data, *r__data; - long r, c; - - THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix"); - - THTensor_(resizeAs)(r_, t); - - t_size_0 = THTensor_(size)(t, 0); - t_size_1 = THTensor_(size)(t, 1); - t_stride_0 = THTensor_(stride)(t, 0); - t_stride_1 = THTensor_(stride)(t, 1); - r__stride_0 = THTensor_(stride)(r_, 0); - r__stride_1 = THTensor_(stride)(r_, 1); - r__data = THTensor_(data)(r_); - t_data = THTensor_(data)(t); - - for(r = 0; r < t_size_0; r++) - { - long sz = THMin(r+k+1, t_size_1); - for(c = THMax(0, r+k+1); c < t_size_1; c++) - r__data[r*r__stride_0+c*r__stride_1] = 0; - for(c = 0; c < sz; c++) - r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1]; - } -} - -void THTensor_(triu)(THTensor *r_, THTensor *t, long k) -{ - long t_size_0, t_size_1; - long t_stride_0, t_stride_1; - long r__stride_0, r__stride_1; - real *t_data, *r__data; - long r, c; - - THArgCheck(THTensor_(nDimension)(t) == 2, 1, "expected a matrix"); - - THTensor_(resizeAs)(r_, t); - - t_size_0 = THTensor_(size)(t, 0); - t_size_1 = THTensor_(size)(t, 1); - t_stride_0 = THTensor_(stride)(t, 0); - t_stride_1 = THTensor_(stride)(t, 1); - r__stride_0 = THTensor_(stride)(r_, 0); - r__stride_1 = THTensor_(stride)(r_, 1); - r__data = THTensor_(data)(r_); - t_data = THTensor_(data)(t); - - for(r = 0; r < t_size_0; r++) - { - long sz = THMin(r+k, t_size_1); - for(c = THMax(0, r+k); c < t_size_1; c++) - r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1]; - for(c = 0; c < sz; c++) - r__data[r*r__stride_0+c*r__stride_1] = 0; - } -} - -void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension) -{ - THTensor* inputs[2]; - inputs[0] = ta; - inputs[1] = tb; - THTensor_(catArray)(r_, inputs, 2, dimension); -} - -void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension) -{ - THLongStorage *size; - int i, j; - long offset; - int maxDim = dimension + 1; - int allEmpty = 1; - int allContiguous = 1; - - // cat_dimension is the actual dimension we cat along - int cat_dimension = dimension; - - for (i = 0; i < numInputs; i++) - { - maxDim = THMax(maxDim, inputs[i]->nDimension); - } - - // When the user input dimension is -1 (i.e. -2 in C) - // Then we pick the maximum last dimension across all tensors. - if ( dimension + TH_INDEX_BASE == -1 ) - { - cat_dimension = maxDim?(maxDim-1):0; - } - - THArgCheck(numInputs > 0, 3, "invalid number of inputs %d", numInputs); - THArgCheck(cat_dimension >= 0, 4, "invalid dimension %d", dimension + TH_INDEX_BASE); - - size = THLongStorage_newWithSize(maxDim); - - for(i = 0; i < maxDim; i++) - { - // dimSize is either the size of the dim if it exists, either 1 if #dim > 0, otherwise 0 - long dimSize = i < inputs[0]->nDimension ? inputs[0]->size[i] : THMin(inputs[0]->nDimension, 1); - if (i == cat_dimension) - { - for (j = 1; j < numInputs; j++) - { - // accumulate the size over the dimension we want to cat on. - // Empty tensors are allowed - dimSize += i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1); - } - } - else - { - for (j = 1; j < numInputs; j++) - { - long sz = (i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1)); - // If it's a dimension we're not catting on - // Then fail if sizes are different AND > 0 - if (dimSize != sz && dimSize && sz) - { - THLongStorage_free(size); - THError("inconsistent tensor sizes"); - } - else if(!dimSize) - { - dimSize = sz; - } - } - } - allEmpty = allEmpty && !dimSize; - size->data[i] = dimSize; - } - - // Initiate catting and resizing - // If at least one of the input is not empty - if (!allEmpty) - { - THTensor_(resize)(result, size, NULL); - - // Check contiguity of all inputs and result - for (i = 0; i < numInputs; i++) { - if(inputs[i]->nDimension) { - allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]); - } - } - allContiguous = allContiguous && THTensor_(isContiguous)(result); - - // First path is for contiguous inputs along dim 1 - // Second path for non-contiguous - if (cat_dimension == 0 && allContiguous) - { - real* result_data = result->storage->data + result->storageOffset; - offset = 0; - for (j = 0; j < numInputs; j++) - { - if (inputs[j]->nDimension) - { - THTensor* input0 = inputs[j]; - real* input0_data = input0->storage->data + input0->storageOffset; - long input0_size = THTensor_(nElement)(input0); - memcpy(result_data + offset, input0_data, input0_size*sizeof(real)); - offset += input0_size; - } - } - } - else - { - offset = 0; - for (j = 0; j < numInputs; j++) - { - if (inputs[j]->nDimension) - { - long dimSize = cat_dimension < inputs[j]->nDimension ? inputs[j]->size[cat_dimension] : 1; - THTensor *nt = THTensor_(newWithTensor)(result); - THTensor_(narrow)(nt, NULL, cat_dimension, offset, dimSize); - THTensor_(copy)(nt, inputs[j]); - THTensor_(free)(nt); - offset += dimSize; - } - } - } - } - THLongStorage_free(size); -} - -int THTensor_(equal)(THTensor *ta, THTensor* tb) -{ - int equal = 1; - if(!THTensor_(isSameSizeAs)(ta, tb)) - return 0; - - if (THTensor_(isContiguous)(ta) && THTensor_(isContiguous)(tb)) { - real *tap = THTensor_(data)(ta); - real *tbp = THTensor_(data)(tb); - ptrdiff_t sz = THTensor_(nElement)(ta); - ptrdiff_t i; - for (i=0; i<sz; ++i){ - if(tap[i] != tbp[i]) return 0; - } - } else { - // Short-circuit the apply function on inequality - TH_TENSOR_APPLY2(real, ta, real, tb, - if (equal && *ta_data != *tb_data) { - equal = 0; - TH_TENSOR_APPLY_hasFinished = 1; break; - }) - } - return equal; -} - -#define TENSOR_IMPLEMENT_LOGICAL(NAME,OP) \ - void THTensor_(NAME##Value)(THByteTensor *r_, THTensor* t, real value) \ - { \ - THByteTensor_resizeNd(r_, t->nDimension, t->size, NULL); \ - TH_TENSOR_APPLY2(unsigned char, r_, real, t, \ - *r__data = (*t_data OP value) ? 1 : 0;); \ - } \ - void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, real value) \ - { \ - THTensor_(resizeNd)(r_, t->nDimension, t->size, NULL); \ - TH_TENSOR_APPLY2(real, r_, real, t, \ - *r__data = (*t_data OP value) ? 1 : 0;); \ - } \ - void THTensor_(NAME##Tensor)(THByteTensor *r_, THTensor *ta, THTensor *tb) \ - { \ - THByteTensor_resizeNd(r_, ta->nDimension, ta->size, NULL); \ - TH_TENSOR_APPLY3(unsigned char, r_, real, ta, real, tb, \ - *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ - } \ - void THTensor_(NAME##TensorT)(THTensor *r_, THTensor *ta, THTensor *tb) \ - { \ - THTensor_(resizeNd)(r_, ta->nDimension, ta->size, NULL); \ - TH_TENSOR_APPLY3(real, r_, real, ta, real, tb, \ - *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \ - } \ - - -TENSOR_IMPLEMENT_LOGICAL(lt,<) -TENSOR_IMPLEMENT_LOGICAL(gt,>) -TENSOR_IMPLEMENT_LOGICAL(le,<=) -TENSOR_IMPLEMENT_LOGICAL(ge,>=) -TENSOR_IMPLEMENT_LOGICAL(eq,==) -TENSOR_IMPLEMENT_LOGICAL(ne,!=) - -#define LAB_IMPLEMENT_BASIC_FUNCTION(NAME, CFUNC) \ - void THTensor_(NAME)(THTensor *r_, THTensor *t) \ - { \ - THTensor_(resizeAs)(r_, t); \ - TH_TENSOR_APPLY2(real, t, real, r_, *r__data = CFUNC(*t_data);); \ - } \ - -#define LAB_IMPLEMENT_BASIC_FUNCTION_VALUE(NAME, CFUNC) \ - void THTensor_(NAME)(THTensor *r_, THTensor *t, real value) \ - { \ - THTensor_(resizeAs)(r_, t); \ - TH_TENSOR_APPLY2(real, t, real, r_, *r__data = CFUNC(*t_data, value);); \ - } \ - -#if defined(TH_REAL_IS_LONG) -LAB_IMPLEMENT_BASIC_FUNCTION(abs,labs) -#endif /* long only part */ - -#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) -LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs) -#endif /* int only part */ - -#if defined(TH_REAL_IS_BYTE) - -#define TENSOR_IMPLEMENT_LOGICAL_SUM(NAME, OP, INIT_VALUE) \ - int THTensor_(NAME)(THTensor *tensor) \ - { \ - THArgCheck(tensor->nDimension > 0, 1, "empty Tensor"); \ - int sum = INIT_VALUE; \ - TH_TENSOR_APPLY(real, tensor, sum = sum OP *tensor_data;); \ - return sum; \ - } - -TENSOR_IMPLEMENT_LOGICAL_SUM(logicalall, &&, 1) -TENSOR_IMPLEMENT_LOGICAL_SUM(logicalany, ||, 0) - -#endif /* Byte only part */ - -/* floating point only now */ -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - -#if defined (TH_REAL_IS_FLOAT) -#define TH_MATH_NAME(fn) fn##f -#else -#define TH_MATH_NAME(fn) fn -#endif - -LAB_IMPLEMENT_BASIC_FUNCTION(log,TH_MATH_NAME(log)) -LAB_IMPLEMENT_BASIC_FUNCTION(lgamma,TH_MATH_NAME(lgamma)) -LAB_IMPLEMENT_BASIC_FUNCTION(log1p,TH_MATH_NAME(log1p)) -LAB_IMPLEMENT_BASIC_FUNCTION(sigmoid,TH_MATH_NAME(TH_sigmoid)) -LAB_IMPLEMENT_BASIC_FUNCTION(exp,TH_MATH_NAME(exp)) -LAB_IMPLEMENT_BASIC_FUNCTION(cos,TH_MATH_NAME(cos)) -LAB_IMPLEMENT_BASIC_FUNCTION(acos,TH_MATH_NAME(acos)) -LAB_IMPLEMENT_BASIC_FUNCTION(cosh,TH_MATH_NAME(cosh)) -LAB_IMPLEMENT_BASIC_FUNCTION(sin,TH_MATH_NAME(sin)) -LAB_IMPLEMENT_BASIC_FUNCTION(asin,TH_MATH_NAME(asin)) -LAB_IMPLEMENT_BASIC_FUNCTION(sinh,TH_MATH_NAME(sinh)) -LAB_IMPLEMENT_BASIC_FUNCTION(tan,TH_MATH_NAME(tan)) -LAB_IMPLEMENT_BASIC_FUNCTION(atan,TH_MATH_NAME(atan)) -LAB_IMPLEMENT_BASIC_FUNCTION(tanh,TH_MATH_NAME(tanh)) -LAB_IMPLEMENT_BASIC_FUNCTION_VALUE(pow,TH_MATH_NAME(pow)) -LAB_IMPLEMENT_BASIC_FUNCTION(sqrt,TH_MATH_NAME(sqrt)) -LAB_IMPLEMENT_BASIC_FUNCTION(rsqrt,TH_MATH_NAME(TH_rsqrt)) -LAB_IMPLEMENT_BASIC_FUNCTION(ceil,TH_MATH_NAME(ceil)) -LAB_IMPLEMENT_BASIC_FUNCTION(floor,TH_MATH_NAME(floor)) -LAB_IMPLEMENT_BASIC_FUNCTION(round,TH_MATH_NAME(round)) -LAB_IMPLEMENT_BASIC_FUNCTION(abs,TH_MATH_NAME(fabs)) -LAB_IMPLEMENT_BASIC_FUNCTION(trunc,TH_MATH_NAME(trunc)) -LAB_IMPLEMENT_BASIC_FUNCTION(frac,TH_MATH_NAME(TH_frac)) -LAB_IMPLEMENT_BASIC_FUNCTION(neg,-) -LAB_IMPLEMENT_BASIC_FUNCTION(cinv, TH_MATH_NAME(1.0) / ) - - -void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty) -{ - THTensor_(resizeAs)(r_, tx); - TH_TENSOR_APPLY3(real, r_, real, tx, real, ty, *r__data = TH_MATH_NAME(atan2)(*tx_data,*ty_data);); -} - -void THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight) -{ - THArgCheck(THTensor_(nElement)(a) == THTensor_(nElement)(b), 2, "sizes do not match"); - THTensor_(resizeAs)(r_, a); - TH_TENSOR_APPLY3(real, r_, real, a, real, b, *r__data = TH_MATH_NAME(TH_lerp)(*a_data, *b_data, weight);); -} - -void THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim) -{ - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "invalid dimension %d", - dimension + TH_INDEX_BASE); - - THTensor_(sum)(r_, t, dimension, keepdim); - THTensor_(div)(r_, r_, t->size[dimension]); -} - -void THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim) -{ - THLongStorage *dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d", - dimension + TH_INDEX_BASE); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(r_, dim, NULL); - THLongStorage_free(dim); - - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal sum = 0; - accreal sum2 = 0; - long i; - for(i = 0; i < t_size; i++) - { - real z = t_data[i*t_stride]; - sum += z; - sum2 += z*z; - } - - if(flag) - { - sum /= t_size; - sum2 /= t_size; - sum2 -= sum*sum; - sum2 = (sum2 < 0 ? 0 : sum2); - *r__data = (real)TH_MATH_NAME(sqrt)(sum2); - } - else - { - sum /= t_size; - sum2 /= t_size-1; - sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum; - sum2 = (sum2 < 0 ? 0 : sum2); - *r__data = (real)TH_MATH_NAME(sqrt)(sum2); - }); - - if (!keepdim) { - THTensor_(squeeze1d)(r_, r_, dimension); - } -} - -void THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim) -{ - THLongStorage *dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d", - dimension + TH_INDEX_BASE); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(r_, dim, NULL); - THLongStorage_free(dim); - - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal sum = 0; - accreal sum2 = 0; - long i; - for(i = 0; i < t_size; i++) - { - real z = t_data[i*t_stride]; - sum += z; - sum2 += z*z; - } - - if(flag) - { - sum /= t_size; - sum2 /= t_size; - sum2 -= sum*sum; - sum2 = (sum2 < 0 ? 0 : sum2); - *r__data = sum2; - } - else - { - sum /= t_size; - sum2 /= t_size-1; - sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum; - sum2 = (sum2 < 0 ? 0 : sum2); - *r__data = (real)sum2; - }); - - if (!keepdim) { - THTensor_(squeeze1d)(r_, r_, dimension); - } -} - -void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim) -{ - THLongStorage *dim; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, "invalid dimension %d", - dimension + TH_INDEX_BASE); - - dim = THTensor_(newSizeOf)(t); - THLongStorage_set(dim, dimension, 1); - THTensor_(resize)(r_, dim, NULL); - THLongStorage_free(dim); - - if(value == 0) { - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal sum = 0; - long i; - for(i = 0; i < t_size; i++) - sum += t_data[i*t_stride] != 0.0; - *r__data = sum;) - } else { - TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension, - accreal sum = 0; - long i; - for(i = 0; i < t_size; i++) { - sum += TH_MATH_NAME(pow)( - TH_MATH_NAME(fabs)(t_data[i*t_stride]), value); - } - *r__data = TH_MATH_NAME(pow)(sum, 1.0/value);) - } - - if (!keepdim) { - THTensor_(squeeze1d)(r_, r_, dimension); - } -} - -accreal THTensor_(normall)(THTensor *tensor, real value) -{ - accreal sum = 0; - if(value == 0) { - TH_TENSOR_APPLY(real, tensor, sum += *tensor_data != 0.0;); - return sum; - } else if(value == 1) { - TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(fabs)(*tensor_data);); - return sum; - } else if(value == 2) { - TH_TENSOR_APPLY(real, tensor, accreal z = *tensor_data; sum += z*z;); - return sqrt(sum); - } else { - TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*tensor_data), value);); - return TH_MATH_NAME(pow)(sum, 1.0/value); - } -} - -void THTensor_(renorm)(THTensor *res, THTensor *src, real value, int dimension, real maxnorm) -{ - int i; - THTensor *rowR, *rowS; - - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(src), 3, "invalid dimension %d", - dimension + TH_INDEX_BASE); - THArgCheck(value > 0, 2, "non-positive-norm not supported"); - THArgCheck(THTensor_(nDimension)(src) > 1, 1, "need at least 2 dimensions, got %d dimensions", - THTensor_(nDimension)(src)); - - rowR = THTensor_(new)(); - rowS = THTensor_(new)(); - - THTensor_(resizeAs)(res, src); - - for (i=0; i<src->size[dimension]; i++) - { - real norm = 0; - real new_norm; - - THTensor_(select)(rowS, src, dimension, i); - THTensor_(select)(rowR, res, dimension, i); - if (value == 1) { - TH_TENSOR_APPLY(real, rowS, norm += fabs(*rowS_data);); - } else if (value == 2) { - TH_TENSOR_APPLY(real, rowS, accreal z = *rowS_data; norm += z*z;); - } else { - TH_TENSOR_APPLY(real, rowS, norm += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*rowS_data), value);); - } - - norm = pow(norm, 1/value); - - if (norm > maxnorm) - { - new_norm = maxnorm / (norm + 1e-7); - - TH_TENSOR_APPLY2( - real, rowR, real, rowS, - *rowR_data = (*rowS_data) * new_norm; - ) - } - else - THTensor_(copy)(rowR, rowS); - } - - THTensor_(free)(rowR); - THTensor_(free)(rowS); -} - -accreal THTensor_(dist)(THTensor *tensor, THTensor *src, real value) -{ - real sum = 0; - TH_TENSOR_APPLY2(real, tensor, real, src, - sum += TH_MATH_NAME(pow)( - TH_MATH_NAME(fabs)(*tensor_data - *src_data), value);); - return TH_MATH_NAME(pow)(sum, 1.0/value); -} - -accreal THTensor_(meanall)(THTensor *tensor) -{ - THArgCheck(tensor->nDimension > 0, 1, "empty Tensor"); - return THTensor_(sumall)(tensor)/THTensor_(nElement)(tensor); -} - -accreal THTensor_(varall)(THTensor *tensor) -{ - accreal mean = THTensor_(meanall)(tensor); - accreal sum = 0; - TH_TENSOR_APPLY(real, tensor, sum += (*tensor_data - mean)*(*tensor_data - mean);); - sum /= (THTensor_(nElement)(tensor)-1); - return sum; -} - -accreal THTensor_(stdall)(THTensor *tensor) -{ - return sqrt(THTensor_(varall)(tensor)); -} - -void THTensor_(linspace)(THTensor *r_, real a, real b, long n) -{ - real i = 0; - - THArgCheck(n > 1 || (n == 1 && (a == b)), 3, "invalid number of points"); - - if (THTensor_(nElement)(r_) != n) { - THTensor_(resize1d)(r_, n); - } - - if(n == 1) { - TH_TENSOR_APPLY(real, r_, - *r__data = a; - i++; - ); - } else { - TH_TENSOR_APPLY(real, r_, - *r__data = a + i*(b-a)/((real)(n-1)); - i++; - ); - } -} - -void THTensor_(logspace)(THTensor *r_, real a, real b, long n) -{ - real i = 0; - - THArgCheck(n > 1 || (n == 1 && (a == b)), 3, "invalid number of points"); - - if (THTensor_(nElement)(r_) != n) { - THTensor_(resize1d)(r_, n); - } - - if(n == 1) { - TH_TENSOR_APPLY(real, r_, - *r__data = TH_MATH_NAME(pow)(10.0, a); - i++; - ); - } else { - TH_TENSOR_APPLY(real, r_, - *r__data = TH_MATH_NAME(pow)(10.0, a + i*(b-a)/((real)(n-1))); - i++; - ); - } -} - -void THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size) -{ - THTensor_(resize)(r_, size, NULL); - THTensor_(uniform)(r_, _generator, 0, 1); -} - -void THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size) -{ - THTensor_(resize)(r_, size, NULL); - THTensor_(normal)(r_, _generator, 0, 1); -} - -void THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue) -{ - real minval; - real maxval; - real *h_data; - - THTensor_(resize1d)(hist, nbins); - THTensor_(zero)(hist); - minval = minvalue; - maxval = maxvalue; - if (minval == maxval) - { - minval = THTensor_(minall)(tensor); - maxval = THTensor_(maxall)(tensor); - } - if (minval == maxval) - { - minval = minval - 1; - maxval = maxval + 1; - } - - h_data = THTensor_(data)(hist); - - TH_TENSOR_APPLY(real, tensor, - if (*tensor_data >= minval && *tensor_data <= maxval) { - const int bin = (int)((*tensor_data-minval) / (maxval-minval) * nbins); - h_data[THMin(bin, nbins-1)] += 1; - } - ); -} - -void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue) -{ - THArgCheck(THTensor_(nDimension)(tensor) < 3, 2, "invalid dimension %d, the input must be a 2d tensor", THTensor_(nDimension)(tensor)); - - int dimension = 1; - THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(tensor), 2, "invalid dimension %d", - dimension + TH_INDEX_BASE); - - real minval; - real maxval; - real *h_data; - - THTensor_(resize2d)(hist, tensor->size[0], nbins); - THTensor_(zero)(hist); - - minval = minvalue; - maxval = maxvalue; - if (minval == maxval) - { - minval = THTensor_(minall)(tensor); - maxval = THTensor_(maxall)(tensor); - } - if (minval == maxval) - { - minval = minval - 1; - maxval = maxval + 1; - } - - TH_TENSOR_DIM_APPLY2(real, tensor, real, hist, dimension, long i; - for(i = 0; i < tensor_size; i++) - { - if(tensor_data[i*tensor_stride] >= minval && tensor_data[i*tensor_stride] <= maxval) { - const int bin = (int)((tensor_data[i*tensor_stride]-minval) / (maxval-minval) * nbins); - hist_data[THMin(bin, nbins-1)] += 1; - } - } - ); -} - -#undef TH_MATH_NAME -#endif /* floating point only part */ -#undef IS_NONZERO -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.h deleted file mode 100644 index 17e54ccf6..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorMath.h +++ /dev/null @@ -1,198 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorMath.h" -#else - -TH_API void THTensor_(fill)(THTensor *r_, real value); -TH_API void THTensor_(zero)(THTensor *r_); - -TH_API void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value); -TH_API void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src); -TH_API void THTensor_(maskedSelect)(THTensor *tensor, THTensor* src, THByteTensor *mask); - -TH_API void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor); - -TH_API void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index); -TH_API void THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); -TH_API void THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); -TH_API void THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, real val); - -TH_API void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index); -TH_API void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); -TH_API void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src); -TH_API void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val); - -TH_API accreal THTensor_(dot)(THTensor *t, THTensor *src); - -TH_API real THTensor_(minall)(THTensor *t); -TH_API real THTensor_(maxall)(THTensor *t); -TH_API real THTensor_(medianall)(THTensor *t); -TH_API accreal THTensor_(sumall)(THTensor *t); -TH_API accreal THTensor_(prodall)(THTensor *t); - -TH_API void THTensor_(neg)(THTensor *self, THTensor *src); -TH_API void THTensor_(cinv)(THTensor *self, THTensor *src); - -TH_API void THTensor_(add)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(sub)(THTensor *self, THTensor *src, real value); -TH_API void THTensor_(mul)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(div)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(lshift)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(rshift)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(fmod)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(remainder)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value); -TH_API void THTensor_(bitand)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(bitor)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(bitxor)(THTensor *r_, THTensor *t, real value); - -TH_API void THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src); -TH_API void THTensor_(csub)(THTensor *self, THTensor *src1, real value, THTensor *src2); -TH_API void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src); -TH_API void THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src); - -TH_API void THTensor_(addcmul)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2); -TH_API void THTensor_(addcdiv)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2); - -TH_API void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat, THTensor *vec); -TH_API void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat1, THTensor *mat2); -TH_API void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2); - -TH_API void THTensor_(addbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2); -TH_API void THTensor_(baddbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2); - -TH_API void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain); - -TH_API ptrdiff_t THTensor_(numel)(THTensor *t); -TH_API void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim); -TH_API void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension); -TH_API void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension); -TH_API void THTensor_(sign)(THTensor *r_, THTensor *t); -TH_API accreal THTensor_(trace)(THTensor *t); -TH_API void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension); - -TH_API void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src); -TH_API void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src); -TH_API void THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value); -TH_API void THTensor_(cminValue)(THTensor *r, THTensor *t, real value); - -TH_API void THTensor_(zeros)(THTensor *r_, THLongStorage *size); -TH_API void THTensor_(ones)(THTensor *r_, THLongStorage *size); -TH_API void THTensor_(diag)(THTensor *r_, THTensor *t, int k); -TH_API void THTensor_(eye)(THTensor *r_, long n, long m); -TH_API void THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step); -TH_API void THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step); -TH_API void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n); - -TH_API void THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size); -TH_API void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder); -TH_API void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted); -TH_API void THTensor_(tril)(THTensor *r_, THTensor *t, long k); -TH_API void THTensor_(triu)(THTensor *r_, THTensor *t, long k); -TH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension); -TH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension); - -TH_API int THTensor_(equal)(THTensor *ta, THTensor *tb); - -TH_API void THTensor_(ltValue)(THByteTensor *r_, THTensor* t, real value); -TH_API void THTensor_(leValue)(THByteTensor *r_, THTensor* t, real value); -TH_API void THTensor_(gtValue)(THByteTensor *r_, THTensor* t, real value); -TH_API void THTensor_(geValue)(THByteTensor *r_, THTensor* t, real value); -TH_API void THTensor_(neValue)(THByteTensor *r_, THTensor* t, real value); -TH_API void THTensor_(eqValue)(THByteTensor *r_, THTensor* t, real value); - -TH_API void THTensor_(ltValueT)(THTensor *r_, THTensor* t, real value); -TH_API void THTensor_(leValueT)(THTensor *r_, THTensor* t, real value); -TH_API void THTensor_(gtValueT)(THTensor *r_, THTensor* t, real value); -TH_API void THTensor_(geValueT)(THTensor *r_, THTensor* t, real value); -TH_API void THTensor_(neValueT)(THTensor *r_, THTensor* t, real value); -TH_API void THTensor_(eqValueT)(THTensor *r_, THTensor* t, real value); - -TH_API void THTensor_(ltTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(leTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(gtTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(geTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(neTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(eqTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb); - -TH_API void THTensor_(ltTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(leTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(gtTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(geTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(neTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); -TH_API void THTensor_(eqTensorT)(THTensor *r_, THTensor *ta, THTensor *tb); - -#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG) -TH_API void THTensor_(abs)(THTensor *r_, THTensor *t); -#endif - -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - -TH_API void THTensor_(sigmoid)(THTensor *r_, THTensor *t); -TH_API void THTensor_(log)(THTensor *r_, THTensor *t); -TH_API void THTensor_(lgamma)(THTensor *r_, THTensor *t); -TH_API void THTensor_(log1p)(THTensor *r_, THTensor *t); -TH_API void THTensor_(exp)(THTensor *r_, THTensor *t); -TH_API void THTensor_(cos)(THTensor *r_, THTensor *t); -TH_API void THTensor_(acos)(THTensor *r_, THTensor *t); -TH_API void THTensor_(cosh)(THTensor *r_, THTensor *t); -TH_API void THTensor_(sin)(THTensor *r_, THTensor *t); -TH_API void THTensor_(asin)(THTensor *r_, THTensor *t); -TH_API void THTensor_(sinh)(THTensor *r_, THTensor *t); -TH_API void THTensor_(tan)(THTensor *r_, THTensor *t); -TH_API void THTensor_(atan)(THTensor *r_, THTensor *t); -TH_API void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty); -TH_API void THTensor_(tanh)(THTensor *r_, THTensor *t); -TH_API void THTensor_(pow)(THTensor *r_, THTensor *t, real value); -TH_API void THTensor_(tpow)(THTensor *r_, real value, THTensor *t); -TH_API void THTensor_(sqrt)(THTensor *r_, THTensor *t); -TH_API void THTensor_(rsqrt)(THTensor *r_, THTensor *t); -TH_API void THTensor_(ceil)(THTensor *r_, THTensor *t); -TH_API void THTensor_(floor)(THTensor *r_, THTensor *t); -TH_API void THTensor_(round)(THTensor *r_, THTensor *t); -TH_API void THTensor_(abs)(THTensor *r_, THTensor *t); -TH_API void THTensor_(trunc)(THTensor *r_, THTensor *t); -TH_API void THTensor_(frac)(THTensor *r_, THTensor *t); -TH_API void THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight); - -TH_API void THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim); -TH_API void THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim); -TH_API void THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int flag, int keepdim); -TH_API void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim); -TH_API void THTensor_(renorm)(THTensor *r_, THTensor *t, real value, int dimension, real maxnorm); -TH_API accreal THTensor_(dist)(THTensor *a, THTensor *b, real value); -TH_API void THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue); -TH_API void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue); - -TH_API accreal THTensor_(meanall)(THTensor *self); -TH_API accreal THTensor_(varall)(THTensor *self); -TH_API accreal THTensor_(stdall)(THTensor *self); -TH_API accreal THTensor_(normall)(THTensor *t, real value); - -TH_API void THTensor_(linspace)(THTensor *r_, real a, real b, long n); -TH_API void THTensor_(logspace)(THTensor *r_, real a, real b, long n); -TH_API void THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size); -TH_API void THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size); -#endif - -#if defined(TH_REAL_IS_BYTE) - -TH_API int THTensor_(logicalall)(THTensor *self); -TH_API int THTensor_(logicalany)(THTensor *self); - -#endif /* TH_REAL_IS_BYTE */ - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.c b/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.c deleted file mode 100644 index 514d3dd27..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.c +++ /dev/null @@ -1,250 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorRandom.c" -#else - -void THTensor_(random)(THTensor *self, THGenerator *_generator) -{ -#if defined(TH_REAL_IS_BYTE) - TH_TENSOR_APPLY(real, self, *self_data = (unsigned char)(THRandom_random(_generator) % (UCHAR_MAX+1));); -#elif defined(TH_REAL_IS_CHAR) - TH_TENSOR_APPLY(real, self, *self_data = (char)(THRandom_random(_generator) % (CHAR_MAX+1));); -#elif defined(TH_REAL_IS_SHORT) - TH_TENSOR_APPLY(real, self, *self_data = (short)(THRandom_random(_generator) % (SHRT_MAX+1));); -#elif defined(TH_REAL_IS_INT) - TH_TENSOR_APPLY(real, self, *self_data = (int)(THRandom_random(_generator) % (INT_MAX+1UL));); -#elif defined(TH_REAL_IS_LONG) - TH_TENSOR_APPLY(real, self, *self_data = (long)(THRandom_random(_generator) % (LONG_MAX+1UL));); -#elif defined(TH_REAL_IS_FLOAT) - TH_TENSOR_APPLY(real, self, *self_data = (float)(THRandom_random(_generator) % ((1UL << FLT_MANT_DIG)+1));); -#elif defined(TH_REAL_IS_DOUBLE) - TH_TENSOR_APPLY(real, self, *self_data = (double)(THRandom_random(_generator) % ((1ULL << DBL_MANT_DIG)+1));); -#else -#error "Unknown type" -#endif -} - -void THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p) -{ - TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_geometric(_generator, p);); -} - -void THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p) -{ - TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_bernoulli(_generator, p);); -} - -void THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p) -{ - TH_TENSOR_APPLY2(real, self, float, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data);); -} - -void THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p) -{ - TH_TENSOR_APPLY2(real, self, double, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data);); -} - -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) - -void THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b) -{ - TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_uniform(_generator, a, b);); -} - -void THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv) -{ - TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_normal(_generator, mean, stdv);); -} - -void THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda) -{ - TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_exponential(_generator, lambda);); -} - -void THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma) -{ - TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_cauchy(_generator, median, sigma);); -} - -void THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv) -{ - TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_logNormal(_generator, mean, stdv);); -} - -void THTensor_(multinomial)(THLongTensor *self, THGenerator *_generator, THTensor *prob_dist, int n_sample, int with_replacement) -{ - int start_dim = THTensor_(nDimension)(prob_dist); - long n_dist; - long n_categories; - THDoubleTensor* cum_dist; - int i,j,k; - - if (start_dim == 1) - { - THTensor_(resize2d)(prob_dist, 1, THTensor_(size)(prob_dist, 0)); - } - - n_dist = THTensor_(size)(prob_dist, 0); - n_categories = THTensor_(size)(prob_dist, 1); - - THArgCheck(n_sample > 0, 2, "cannot sample n_sample < 0 samples"); - - if (!with_replacement) - { - THArgCheck((!with_replacement) && (n_sample <= n_categories), 2, \ - "cannot sample n_sample > prob_dist:size(1) samples without replacement"); - } - - /* cumulative probability distribution vector */ - cum_dist = THDoubleTensor_newWithSize1d(n_categories); - - /* will contain multinomial samples (category indices to be returned) */ - THLongTensor_resize2d(self, n_dist , n_sample); - - for (i=0; i<n_dist; i++) - { - /* Get normalized cumulative distribution from prob distribution */ - double sum = 0; - for (j=0; j<n_categories; j++) - { - sum += THStorage_(get)( \ - prob_dist->storage, \ - prob_dist->storageOffset+i*prob_dist->stride[0]+j*prob_dist->stride[1] \ - ); - THDoubleStorage_set( - cum_dist->storage, \ - cum_dist->storageOffset+j*cum_dist->stride[0], \ - sum \ - ); - } - THArgCheckWithCleanup((sum > 0), THCleanup(THDoubleTensor_free(cum_dist);), 2, - "invalid multinomial distribution (sum of probabilities <= 0)"); - /* normalize cumulative probability distribution so that last val is 1 - i.e. doesn't assume original prob_dist row sums to one */ - if ( (sum > 0) || ( ( sum < 1.00001) && (sum > 0.99999) ) ) - { - for (j=0; j<n_categories; j++) - { - THDoubleTensor_data(cum_dist)[j*cum_dist->stride[0]] /= sum; - } - } - - for (j=0; j<n_sample; j++) - { - /* sample a probability mass from a uniform distribution */ - double uniform_sample = THRandom_uniform(_generator, 0, 1); - /* Do a binary search for the slot in which the prob falls - ie cum_dist[row][slot-1] < uniform_prob < cum_distr[row][slot] */ - int left_pointer = 0; - int right_pointer = n_categories; - int mid_pointer; - double cum_prob; - int sample_idx; - /* Make sure the last cumulative distribution bucket sums to 1 */ - THDoubleTensor_data(cum_dist)[(n_categories-1)*cum_dist->stride[0]] = 1; - - while(right_pointer - left_pointer > 0) - { - mid_pointer = left_pointer + (right_pointer - left_pointer) / 2; - cum_prob = THDoubleStorage_get( \ - cum_dist->storage, \ - cum_dist->storageOffset+mid_pointer*cum_dist->stride[0] \ - ); - if (cum_prob < uniform_sample) - { - left_pointer = mid_pointer + 1; - } - else - { - right_pointer = mid_pointer; - } - } - sample_idx = left_pointer; - - /* store in result tensor (will be incremented for lua compat by wrapper) */ - THLongStorage_set( \ - self->storage, \ - self->storageOffset+i*self->stride[0]+j*self->stride[1], \ - sample_idx \ - ); - - /* Once a sample is drawn, it cannot be drawn again. ie sample without replacement */ - if (!with_replacement) - { - /* update cumulative distribution so that sample cannot be drawn again */ - double diff; - double new_val = 0; - double sum; - - if (sample_idx != 0) - { - new_val = THDoubleStorage_get( \ - cum_dist->storage, \ - cum_dist->storageOffset+(sample_idx-1)*cum_dist->stride[0] \ - ); - } - /* marginal cumulative mass (i.e. original probability) of sample */ - diff = THDoubleStorage_get( \ - cum_dist->storage, \ - cum_dist->storageOffset+sample_idx*cum_dist->stride[0] \ - ) - new_val; - /* new sum of marginals is not one anymore... */ - sum = 1.0 - diff; - for (k=0; k<n_categories; k++) - { - new_val = THDoubleStorage_get( \ - cum_dist->storage, \ - cum_dist->storageOffset+k*cum_dist->stride[0] \ - ); - if (k >= sample_idx) - { - /* remove sampled probability mass from later cumulative probabilities */ - new_val -= diff; - } - /* make total marginals sum to one */ - new_val /= sum; - THDoubleStorage_set( \ - cum_dist->storage, \ - cum_dist->storageOffset+k*cum_dist->stride[0], \ - new_val \ - ); - } - } - } - } - - THDoubleTensor_free(cum_dist); - - if (start_dim == 1) - { - THLongTensor_resize1d(self, n_sample); - THTensor_(resize1d)(prob_dist, n_categories); - } -} - -#endif - -#if defined(TH_REAL_IS_BYTE) -void THTensor_(getRNGState)(THGenerator *_generator, THTensor *self) -{ - static const size_t size = sizeof(THGenerator); - THGenerator *rng_state; - THTensor_(resize1d)(self, size); - THArgCheck(THTensor_(nElement)(self) == size, 1, "RNG state is wrong size"); - THArgCheck(THTensor_(isContiguous)(self), 1, "RNG state needs to be contiguous"); - rng_state = (THGenerator *)THTensor_(data)(self); - THGenerator_copy(rng_state, _generator); -} - -void THTensor_(setRNGState)(THGenerator *_generator, THTensor *self) -{ - static const size_t size = sizeof(THGenerator); - THGenerator *rng_state; - THArgCheck(THTensor_(nElement)(self) == size, 1, "RNG state is wrong size"); - THArgCheck(THTensor_(isContiguous)(self), 1, "RNG state needs to be contiguous"); - rng_state = (THGenerator *)THTensor_(data)(self); - THArgCheck(THGenerator_isValid(rng_state), 1, "Invalid RNG state"); - THGenerator_copy(_generator, rng_state); -} -#endif - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.h b/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.h deleted file mode 100644 index d20514242..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THTensorRandom.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THTensorRandom.h" -#else - -TH_API void THTensor_(random)(THTensor *self, THGenerator *_generator); -TH_API void THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p); -TH_API void THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p); -TH_API void THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p); -TH_API void THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p); - -#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) -TH_API void THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b); -TH_API void THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv); -TH_API void THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda); -TH_API void THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma); -TH_API void THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv); -TH_API void THTensor_(multinomial)(THLongTensor *self, THGenerator *_generator, THTensor *prob_dist, int n_sample, int with_replacement); -#endif - -#if defined(TH_REAL_IS_BYTE) -TH_API void THTensor_(getRNGState)(THGenerator *_generator, THTensor *self); -TH_API void THTensor_(setRNGState)(THGenerator *_generator, THTensor *self); -#endif - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THVector.h b/contrib/lua-torch/torch7/lib/TH/generic/THVector.h deleted file mode 100644 index 7d368541a..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THVector.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THVector.h" -#else - -TH_API void THVector_(fill)(real *x, const real c, const ptrdiff_t n); -TH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n); -TH_API void THVector_(adds)(real *y, const real *x, const real c, const ptrdiff_t n); -TH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n); -TH_API void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n); -TH_API void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n); -TH_API void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n); -TH_API void THVector_(copy)(real *y, const real *x, const ptrdiff_t n); - -/* Initialize the dispatch pointers */ -TH_API void THVector_(vectorDispatchInit)(void); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDefault.c b/contrib/lua-torch/torch7/lib/TH/generic/THVectorDefault.c deleted file mode 100644 index 3388e0d9b..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDefault.c +++ /dev/null @@ -1,131 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THVectorDefault.c" -#else - -void THVector_(copy_DEFAULT)(real *x, const real *y, const ptrdiff_t n) { - ptrdiff_t i = 0; - - for(; i <n-4; i+=4) - { - x[i] = y[i]; - x[i+1] = y[i+1]; - x[i+2] = y[i+2]; - x[i+3] = y[i+3]; - } - - for(; i < n; i++) - x[i] = y[i]; -} - -void THVector_(fill_DEFAULT)(real *x, const real c, const ptrdiff_t n) { - ptrdiff_t i = 0; - - for(; i <n-4; i+=4) - { - x[i] = c; - x[i+1] = c; - x[i+2] = c; - x[i+3] = c; - } - - for(; i < n; i++) - x[i] = c; -} - -void THVector_(cadd_DEFAULT)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n) -{ - ptrdiff_t i = 0; - - for(; i<n-4; i+=4) - { - z[i] = x[i] + c * y[i]; - z[i+1] = x[i+1] + c * y[i+1]; - z[i+2] = x[i+2] + c * y[i+2]; - z[i+3] = x[i+3] + c * y[i+3]; - } - - for(; i<n; i++) - z[i] = x[i] + c * y[i]; -} - -void THVector_(adds_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) -{ - ptrdiff_t i = 0; - - for(; i<n-4; i+=4) - { - y[i] = x[i] + c; - y[i+1] = x[i+1] + c; - y[i+2] = x[i+2] + c; - y[i+3] = x[i+3] + c; - } - - for(; i<n; i++) - y[i] = x[i] + c; -} - -void THVector_(cmul_DEFAULT)(real *z, const real *x, const real *y, const ptrdiff_t n) -{ - ptrdiff_t i = 0; - - for(; i <n-4; i+=4) - { - z[i] = x[i] * y[i]; - z[i+1] = x[i+1] * y[i+1]; - z[i+2] = x[i+2] * y[i+2]; - z[i+3] = x[i+3] * y[i+3]; - } - - for(; i < n; i++) - z[i] = x[i] * y[i]; -} - -void THVector_(muls_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) -{ - ptrdiff_t i = 0; - - for(; i <n-4; i+=4) - { - y[i] = x[i] * c; - y[i+1] = x[i+1] * c; - y[i+2] = x[i+2] * c; - y[i+3] = x[i+3] * c; - } - - for(; i < n; i++) - y[i] = x[i] * c; -} - -void THVector_(cdiv_DEFAULT)(real *z, const real *x, const real *y, const ptrdiff_t n) -{ - ptrdiff_t i = 0; - - for(; i<n-4; i+=4) - { - z[i] = x[i] / y[i]; - z[i+1] = x[i+1] / y[i+1]; - z[i+2] = x[i+2] / y[i+2]; - z[i+3] = x[i+3] / y[i+3]; - } - - for(; i < n; i++) - z[i] = x[i] / y[i]; -} - -void THVector_(divs_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n) -{ - ptrdiff_t i = 0; - - for(; i<n-4; i+=4) - { - y[i] = x[i] / c; - y[i+1] = x[i+1] / c; - y[i+2] = x[i+2] / c; - y[i+3] = x[i+3] / c; - } - - for(; i < n; i++) - y[i] = x[i] / c; -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDispatch.c b/contrib/lua-torch/torch7/lib/TH/generic/THVectorDispatch.c deleted file mode 100644 index 5b8885283..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/THVectorDispatch.c +++ /dev/null @@ -1,262 +0,0 @@ -#ifndef TH_GENERIC_FILE -#define TH_GENERIC_FILE "generic/THVectorDispatch.c" -#else - -/* For now there are only SIMD implementations for FLOAT and DOUBLE. - * Hopefully in the future this can be made totally generic (e.g, there are SIMD implementations - * for a lot of functions */ -/* Each function with multiple implementations has: - * 1. A DISPATCHPTR which will be initialized to point to the best available implementation for the host - * 2. A DISPATCHTABLE which holds pointers to each implementation of a function, and a value indicating - * which SIMD extension a given implementation uses - * 3. A dispatch stub, which is what is actually called by clients, that simply wraps the dispatch pointer. - */ - -static void (*THVector_(fill_DISPATCHPTR))(real *, const real, const ptrdiff_t) = &THVector_(fill_DEFAULT); -static FunctionDescription THVector_(fill_DISPATCHTABLE)[] = { - #if defined(__NEON__) - #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(fill_NEON), SIMDExtension_NEON), - #endif - #endif - - #if defined(__PPC64__) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(fill_VSX), SIMDExtension_VSX), - #endif - #endif - - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(fill_AVX), SIMDExtension_AVX), - #endif - #endif - - #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(fill_SSE), SIMDExtension_SSE), - #endif - #endif - FUNCTION_IMPL(THVector_(fill_DEFAULT), SIMDExtension_DEFAULT) -}; -void THVector_(fill)(real *x, const real c, const ptrdiff_t n) { - THVector_(fill_DISPATCHPTR)(x, c, n); -} - -static void (*THVector_(cadd_DISPATCHPTR))(real *, const real *, const real *, const real, const ptrdiff_t) = &THVector_(cadd_DEFAULT); -static FunctionDescription THVector_(cadd_DISPATCHTABLE)[] = { - #if defined(__NEON__) - #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cadd_NEON), SIMDExtension_NEON), - #endif - #endif - - #if defined(USE_AVX2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cadd_AVX2), SIMDExtension_AVX2), - #endif - #endif - - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cadd_AVX), SIMDExtension_AVX), - #endif - #endif - - #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cadd_SSE), SIMDExtension_SSE), - #endif - #endif - - FUNCTION_IMPL(THVector_(cadd_DEFAULT), SIMDExtension_DEFAULT) -}; -void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n) { - THVector_(cadd_DISPATCHPTR)(z, x, y, c, n); -} - -static void (*THVector_(adds_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(adds_DEFAULT); -static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = { - #if defined(__NEON__) - #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(adds_NEON), SIMDExtension_NEON), - #endif - #endif - - #if defined(__PPC64__) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(adds_VSX), SIMDExtension_VSX), - #endif - #endif - - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX), - #endif - #endif - - #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(adds_SSE), SIMDExtension_SSE), - #endif - #endif - - FUNCTION_IMPL(THVector_(adds_DEFAULT), SIMDExtension_DEFAULT) -}; -// Dispatch stubs that just call the pointers -TH_API void THVector_(adds)(real *r_, const real *t, const real value, const ptrdiff_t n) { - THVector_(adds_DISPATCHPTR)(r_, t, value, n); -} - -static void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT); -static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = { - #if defined(__NEON__) - #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cmul_NEON), SIMDExtension_NEON), - #endif - #endif - - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cmul_AVX), SIMDExtension_AVX), - #endif - #endif - - #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cmul_SSE), SIMDExtension_SSE), - #endif - #endif - - FUNCTION_IMPL(THVector_(cmul_DEFAULT), SIMDExtension_DEFAULT) -}; -void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) { - THVector_(cmul_DISPATCHPTR)(z, x, y, n); -} - -static void (*THVector_(muls_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(muls_DEFAULT); -static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = { - #if defined(__NEON__) - #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(muls_NEON), SIMDExtension_NEON), - #endif - #endif - - #if defined(__PPC64__) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(muls_VSX), SIMDExtension_VSX), - #endif - #endif - - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX), - #endif - #endif - - #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(muls_SSE), SIMDExtension_SSE), - #endif - #endif - - FUNCTION_IMPL(THVector_(muls_DEFAULT), SIMDExtension_DEFAULT) -}; -void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n) { - THVector_(muls_DISPATCHPTR)(y, x, c, n); -} - -static void (*THVector_(cdiv_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cdiv_DEFAULT); -static FunctionDescription THVector_(cdiv_DISPATCHTABLE)[] = { - #if defined(__NEON__) - #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cdiv_NEON), SIMDExtension_NEON), - #endif - #endif - - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cdiv_AVX), SIMDExtension_AVX), - #endif - #endif - - #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(cdiv_SSE), SIMDExtension_SSE), - #endif - #endif - - FUNCTION_IMPL(THVector_(cdiv_DEFAULT), SIMDExtension_DEFAULT) -}; -void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n) { - THVector_(cdiv_DISPATCHPTR)(z, x, y, n); -} - -static void (*THVector_(divs_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(divs_DEFAULT); -static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = { - #if defined(__NEON__) - #if defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(divs_NEON), SIMDExtension_NEON), - #endif - #endif - - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX), - #endif - #endif - - #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \ - || defined(USE_SSE4_1) || defined(USE_SSE4_2) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(divs_SSE), SIMDExtension_SSE), - #endif - #endif - - FUNCTION_IMPL(THVector_(divs_DEFAULT), SIMDExtension_DEFAULT) -}; -void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) { - THVector_(divs_DISPATCHPTR)(y, x, c, n); -} - -static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT); -static FunctionDescription THVector_(copy_DISPATCHTABLE)[] = { - #if defined(USE_AVX) - #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT) - FUNCTION_IMPL(THVector_(copy_AVX), SIMDExtension_AVX), - #endif - #endif - - FUNCTION_IMPL(THVector_(copy_DEFAULT), SIMDExtension_DEFAULT) -}; -void THVector_(copy)(real *y, const real *x, const ptrdiff_t n) { - THVector_(copy_DISPATCHPTR)(y, x, n); -} - -/* This needs to be called in order to initialize the dispatch pointers at runtime. - * This function simply checks what SIMD extensions are available, and then walks the dispatch table - * to choose the best function. - * NOTE: As implemented, it will initialize the dispatch pointer to the first supported function. - * This means that in the dispatch tables, implementations supporting more recent extensions - * need to come first - */ -void THVector_(vectorDispatchInit)(void) -{ - uint32_t hostSimdExts = detectHostSIMDExtensions(); - INIT_DISPATCH_PTR(fill); - INIT_DISPATCH_PTR(cadd); - INIT_DISPATCH_PTR(adds); - INIT_DISPATCH_PTR(cmul); - INIT_DISPATCH_PTR(muls); - INIT_DISPATCH_PTR(cdiv); - INIT_DISPATCH_PTR(divs); - INIT_DISPATCH_PTR(copy); -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/common_simd.h b/contrib/lua-torch/torch7/lib/TH/generic/simd/common_simd.h deleted file mode 100644 index 425b4b96e..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/simd/common_simd.h +++ /dev/null @@ -1,395 +0,0 @@ -#ifndef COMMON_SIMD_H -#define COMMON_SIMD_H - -/* Weights */ -#define LOAD_WEIGHT(q, simd_type, inst_var) _m ## simd_type ## inst_var(*(q)) - -#define DECLARE_WEIGHTS(simd_type) \ -__ ## simd_type weight0; \ -__ ## simd_type weight1; \ -__ ## simd_type weight2; \ -__ ## simd_type weight3; \ -__ ## simd_type weight4; - -#define LOAD_WEIGHTS(k, simd_type, inst_var) \ -weight0 = LOAD_WEIGHT(weight + 5 * 0 + k, simd_type, inst_var); \ -weight1 = LOAD_WEIGHT(weight + 5 * 1 + k, simd_type, inst_var); \ -weight2 = LOAD_WEIGHT(weight + 5 * 2 + k, simd_type, inst_var); \ -weight3 = LOAD_WEIGHT(weight + 5 * 3 + k, simd_type, inst_var); \ -weight4 = LOAD_WEIGHT(weight + 5 * 4 + k, simd_type, inst_var); - -/* Inputs declare */ -#define DECLARE_INPUT_0(i) \ -float* input0 = image + i; \ - -#define DECLARE_INPUT_1() \ -float* input1 = input0 + inputStride; \ -float* input2 = input1 + inputStride; \ -float* input3 = input2 + inputStride; \ -float* input4 = input3 + inputStride; - -#define DECLARE_INPUT_2() \ -DECLARE_INPUT_1() \ -float* input5 = input4 + inputStride; - -#define DECLARE_INPUT_4() \ -DECLARE_INPUT_2() \ -float* input6 = input5 + inputStride; \ -float* input7 = input6 + inputStride; - -#define DECLARE_INPUT_5() \ -DECLARE_INPUT_4() \ -float* input8 = input7 + inputStride; - -#define DECLARE_INPUT_6() \ -DECLARE_INPUT_5() \ -float* input9 = input8 + inputStride; - -#define DECLARE_INPUT_7() \ -DECLARE_INPUT_6() \ -float* inputA = input9 + inputStride; - -#define DECLARE_INPUT_8() \ -DECLARE_INPUT_7() \ -float* inputB = inputA + inputStride; - - -/* Inputs increment */ -#define INC_INPUT_1()\ -input0++; \ -input1++; \ -input2++; \ -input3++; \ -input4++; \ - -#define INC_INPUT_2()\ -INC_INPUT_1() \ -input5++; - -#define INC_INPUT_4()\ -INC_INPUT_2() \ -input6++; \ -input7++; - -#define INC_INPUT_5()\ -INC_INPUT_4() \ -input8++; - -#define INC_INPUT_6()\ -INC_INPUT_5() \ -input9++; - -#define INC_INPUT_7()\ -INC_INPUT_6() \ -inputA++; - -#define INC_INPUT_8()\ -INC_INPUT_7() \ -inputB++; - -/* Outputs declare */ -#define DECLARE_OUTPUT_1() \ -float* output0 = output; - -#define DECLARE_OUTPUT_2() \ -DECLARE_OUTPUT_1() \ -float* output1 = output0 + outputStride; - -#define DECLARE_OUTPUT_4() \ -DECLARE_OUTPUT_2() \ -float* output2 = output1 + outputStride; \ -float* output3 = output2 + outputStride; - -#define DECLARE_OUTPUT_5() \ -DECLARE_OUTPUT_4() \ -float* output4 = output3 + outputStride; - -#define DECLARE_OUTPUT_6() \ -DECLARE_OUTPUT_5() \ -float* output5 = output4 + outputStride; - -#define DECLARE_OUTPUT_7() \ -DECLARE_OUTPUT_6() \ -float* output6 = output5 + outputStride; - -#define DECLARE_OUTPUT_8() \ -DECLARE_OUTPUT_7() \ -float* output7 = output6 + outputStride; - -/* Outputs increment */ -#define INC_OUTPUT_1(x) \ -output0 += x; - -#define INC_OUTPUT_2(x) \ -INC_OUTPUT_1(x) \ -output1 += x; - -#define INC_OUTPUT_4(x) \ -INC_OUTPUT_2(x) \ -output2 += x; \ -output3 += x; - -#define INC_OUTPUT_5(x) \ -INC_OUTPUT_4(x) \ -output4 += x; - -#define INC_OUTPUT_6(x) \ -INC_OUTPUT_5(x) \ -output5 += x; - -#define INC_OUTPUT_7(x) \ -INC_OUTPUT_6(x) \ -output6 += x; - -#define INC_OUTPUT_8(x) \ -INC_OUTPUT_7(x) \ -output7 += x; - -/* Image declare */ -#define DECLARE_IMAGE_1(simd_type) \ -__ ## simd_type image0; \ -__ ## simd_type image1; \ -__ ## simd_type image2; \ -__ ## simd_type image3; \ -__ ## simd_type image4; - -#define DECLARE_IMAGE_2(simd_type) \ -DECLARE_IMAGE_1(simd_type) \ -__ ## simd_type image5; - -#define DECLARE_IMAGE_4(simd_type) \ -DECLARE_IMAGE_2(simd_type) \ -__ ## simd_type image6; \ -__ ## simd_type image7; - -#define DECLARE_IMAGE_5(simd_type) \ -DECLARE_IMAGE_4(simd_type) \ -__ ## simd_type image8; - -#define DECLARE_IMAGE_6(simd_type) \ -DECLARE_IMAGE_5(simd_type) \ -__ ## simd_type image9; - -#define DECLARE_IMAGE_7(simd_type) \ -DECLARE_IMAGE_6(simd_type) \ -__ ## simd_type imageA; - -#define DECLARE_IMAGE_8(simd_type) \ -DECLARE_IMAGE_7(simd_type) \ -__ ## simd_type imageB; - -/* Sums declare */ -#define DECLARE_SUM_1(simd_type) \ -__ ## simd_type sum0; - -#define DECLARE_SUM_2(simd_type) \ -DECLARE_SUM_1(simd_type) \ -__ ## simd_type sum1; - -#define DECLARE_SUM_4(simd_type) \ -DECLARE_SUM_2(simd_type) \ -__ ## simd_type sum2; \ -__ ## simd_type sum3; - -#define DECLARE_SUM_5(simd_type) \ -DECLARE_SUM_4(simd_type) \ -__ ## simd_type sum4; - -#define DECLARE_SUM_6(simd_type) \ -DECLARE_SUM_5(simd_type) \ -__ ## simd_type sum5; - -#define DECLARE_SUM_7(simd_type) \ -DECLARE_SUM_6(simd_type) \ -__ ## simd_type sum6; - -#define DECLARE_SUM_8(simd_type) \ -DECLARE_SUM_7(simd_type) \ -__ ## simd_type sum7; - -/* Sums load */ -#define LOAD_SUM_1(simd_type) \ -sum0 = _m ## simd_type ## _loadu_ps(output0); - -#define LOAD_SUM_2(simd_type) \ -LOAD_SUM_1(simd_type) \ -sum1 = _m ## simd_type ## _loadu_ps(output1); - -#define LOAD_SUM_4(simd_type) \ -LOAD_SUM_2(simd_type) \ -sum2 = _m ## simd_type ## _loadu_ps(output2); \ -sum3 = _m ## simd_type ## _loadu_ps(output3); - -#define LOAD_SUM_5(simd_type) \ -LOAD_SUM_4(simd_type) \ -sum4 = _m ## simd_type ## _loadu_ps(output4); - -#define LOAD_SUM_6(simd_type) \ -LOAD_SUM_5(simd_type) \ -sum5 = _m ## simd_type ## _loadu_ps(output5); - -#define LOAD_SUM_7(simd_type) \ -LOAD_SUM_6(simd_type) \ -sum6 = _m ## simd_type ## _loadu_ps(output6); - -#define LOAD_SUM_8(simd_type) \ -LOAD_SUM_7(simd_type) \ -sum7 = _m ## simd_type ## _loadu_ps(output7); - -/* Sums store */ -#define STORE_SUM_1(simd_type) \ -_m ## simd_type ## _storeu_ps(output0, sum0); - -#define STORE_SUM_2(simd_type) \ -STORE_SUM_1(simd_type) \ -_m ## simd_type ## _storeu_ps(output1, sum1); - -#define STORE_SUM_4(simd_type) \ -STORE_SUM_2(simd_type) \ -_m ## simd_type ## _storeu_ps(output2, sum2); \ -_m ## simd_type ## _storeu_ps(output3, sum3); - -#define STORE_SUM_5(simd_type) \ -STORE_SUM_4(simd_type) \ -_m ## simd_type ## _storeu_ps(output4, sum4); - -#define STORE_SUM_6(simd_type) \ -STORE_SUM_5(simd_type) \ -_m ## simd_type ## _storeu_ps(output5, sum5); - -#define STORE_SUM_7(simd_type) \ -STORE_SUM_6(simd_type) \ -_m ## simd_type ## _storeu_ps(output6, sum6); - -#define STORE_SUM_8(simd_type) \ -STORE_SUM_7(simd_type) \ -_m ## simd_type ## _storeu_ps(output7, sum7); - -/* Convolution */ -#define CONVOLVE_1ROWS(simd_type) \ -image0 = _m ## simd_type ## _loadu_ps(input0); \ -image1 = _m ## simd_type ## _loadu_ps(input1); \ -image2 = _m ## simd_type ## _loadu_ps(input2); \ -image3 = _m ## simd_type ## _loadu_ps(input3); \ -image4 = _m ## simd_type ## _loadu_ps(input4); \ -\ -sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight0, image0)); \ -sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight1, image1)); \ -sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight2, image2)); \ -sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight3, image3)); \ -sum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight4, image4)); - -#define CONVOLVE_2ROWS(simd_type) \ -CONVOLVE_1ROWS(simd_type) \ -image5 = _m ## simd_type ## _loadu_ps(input5); \ -sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight0, image1)); \ -sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight1, image2)); \ -sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight2, image3)); \ -sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight3, image4)); \ -sum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight4, image5)); - -#define CONVOLVE_4ROWS(simd_type) \ -CONVOLVE_2ROWS(simd_type) \ -image6 = _m ## simd_type ## _loadu_ps(input6); \ -sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight0, image2)); \ -sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight1, image3)); \ -sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight2, image4)); \ -sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight3, image5)); \ -sum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight4, image6)); \ -\ -image7 = _m ## simd_type ## _loadu_ps(input7); \ -sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight0, image3)); \ -sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight1, image4)); \ -sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight2, image5)); \ -sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight3, image6)); \ -sum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight4, image7)); - -#define CONVOLVE_5ROWS(simd_type) \ -CONVOLVE_4ROWS(simd_type) \ -image8 = _m ## simd_type ## _loadu_ps(input8); \ -sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight0, image4)); \ -sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight1, image5)); \ -sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight2, image6)); \ -sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight3, image7)); \ -sum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight4, image8)); - -#define CONVOLVE_6ROWS(simd_type) \ -CONVOLVE_5ROWS(simd_type) \ -image9 = _m ## simd_type ## _loadu_ps(input9); \ -sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight0, image5)); \ -sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight1, image6)); \ -sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight2, image7)); \ -sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight3, image8)); \ -sum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight4, image9)); - -#define CONVOLVE_7ROWS(simd_type) \ -CONVOLVE_6ROWS(simd_type) \ -imageA = _m ## simd_type ## _loadu_ps(inputA); \ -sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight0, image6)); \ -sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight1, image7)); \ -sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight2, image8)); \ -sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight3, image9)); \ -sum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight4, imageA)); - -#define CONVOLVE_8ROWS(simd_type) \ -CONVOLVE_7ROWS(simd_type) \ -imageB = _m ## simd_type ## _loadu_ps(inputB); \ -sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight0, image7)); \ -sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight1, image8)); \ -sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight2, image9)); \ -sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight3, imageA)); \ -sum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight4, imageB)); - -/* Convolution MEGA macro */ -#define DECLARE_SUMX(rows) DECLARE_SUM_ ## rows -#define LOAD_SUMX(rows) LOAD_SUM_ ## rows -#define DECLARE_INPUTX(rows) DECLARE_INPUT_ ## rows -#define DECLARE_IMAGEX(rows) DECLARE_IMAGE_ ## rows -#define CONVOLVEX(rows) CONVOLVE_ ## rows ## ROWS -#define INC_INPUTX(rows) INC_INPUT_ ## rows -#define STORE_SUMX(rows) STORE_SUM_ ## rows -#define INC_OUTPUTX(rows) INC_OUTPUT_ ## rows - -#define CONVOLUTION_LOOP(rows, simd_type, simd_inst_prefex, simd_set, i) \ -DECLARE_SUMX(rows)(simd_type) \ -LOAD_SUMX(rows)(simd_inst_prefex) \ -DECLARE_WEIGHTS(simd_type) \ -DECLARE_INPUT_0(i) \ -DECLARE_INPUTX(rows)() \ -DECLARE_IMAGEX(rows)(simd_type) \ -\ -LOAD_WEIGHTS(0, simd_inst_prefex, simd_set) \ -CONVOLVEX(rows)(simd_inst_prefex) \ -INC_INPUTX(rows)() \ -\ -LOAD_WEIGHTS(1, simd_inst_prefex, simd_set) \ -CONVOLVEX(rows)(simd_inst_prefex) \ -INC_INPUTX(rows)() \ -\ -LOAD_WEIGHTS(2, simd_inst_prefex, simd_set) \ -CONVOLVEX(rows)(simd_inst_prefex) \ -INC_INPUTX(rows)() \ -\ -LOAD_WEIGHTS(3, simd_inst_prefex, simd_set) \ -CONVOLVEX(rows)(simd_inst_prefex) \ -INC_INPUTX(rows)() \ -\ -LOAD_WEIGHTS(4, simd_inst_prefex, simd_set) \ -CONVOLVEX(rows)(simd_inst_prefex) \ -\ -STORE_SUMX(rows)(simd_inst_prefex) \ -\ -INC_OUTPUTX(rows)(sizeof(__ ## simd_type) / sizeof(float)) - - -#define CONVOLVE_8COLS_XROWS(rows, i) \ -{ \ -CONVOLUTION_LOOP(rows, m256, m256, _set1_ps, i) \ -} - -#define CONVOLVE_4COLS_XROWS(rows, i) \ -{ \ -CONVOLUTION_LOOP(rows, m128, m, _set_ps1, i) \ -} - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.c b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.c deleted file mode 100644 index da7a4bb20..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.c +++ /dev/null @@ -1,127 +0,0 @@ -#if defined(USE_AVX) && defined(__AVX__) - -#ifdef _MSC_VER -#include <intrin.h> - -static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, - unsigned int *__ebx, unsigned int *__ecx, - unsigned int *__edx) { - unsigned int cpui[4]; - __cpuid(cpui, __level); - *__eax = cpui[0]; *__ebx = cpui[1]; *__ecx = cpui[2]; *__edx = cpui[3]; - return 1; -} - -static void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) { - *eax = 0; *edx = 0; - if (op == 0) - *eax = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); -} - -#else - -#if __i386__ -#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ -__asm(" pushl %%ebx\n" \ -" cpuid\n" \ -" mov %%ebx,%1\n" \ -" popl %%ebx" \ -: "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ -: "0"(__level)) -#else -#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ -__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ -: "0"(__level)) -#endif - -static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, - unsigned int *__ebx, unsigned int *__ecx, - unsigned int *__edx) { - __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx); - return 1; -} - -static void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) { - __asm__ __volatile__ - (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); -} - -#endif - -enum ECPUFeature -{ - kCPUFeature_SSE = 0x01, - kCPUFeature_SSE2 = 0x02, - kCPUFeature_SSE3 = 0x04, - kCPUFeature_SSE3_S = 0x08, - kCPUFeature_SSE4_1 = 0x10, - kCPUFeature_SSE4_2 = 0x20, - kCPUFeature_AVX = 0x40 -}; - -static unsigned int checkCPUFeatures() { - unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; - unsigned int features = 0; - __get_cpuid(1, &eax, &ebx, &ecx, &edx); - if( (edx & (1 << 25)) != 0 ) { - features |= kCPUFeature_SSE; - } - if( (edx & (1 << 26)) != 0 ) { - features |= kCPUFeature_SSE2; - } - if( (ecx & (1 << 0)) != 0 ) { - features |= kCPUFeature_SSE3; - } - if( (ecx & (1 << 9)) != 0 ) { - features |= kCPUFeature_SSE3_S; - } - if( (ecx & (1 << 19)) != 0 ) { - features |= kCPUFeature_SSE4_1; - } - if( (ecx & (1 << 20)) != 0 ) { - features |= kCPUFeature_SSE4_2; - } - if( (ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0 ) { - xgetbv(0, &eax, &edx); - if( (eax & 6) == 6 ) { - features |= kCPUFeature_AVX; - } - } - return features; -} - -#include <stdio.h> - -static int haveCPUFeature(unsigned int feature) { - static unsigned int sCPUFeatures = 0; - static int sDetectedCPUFeatures = 0; - if (!sDetectedCPUFeatures) { - sDetectedCPUFeatures = 1; - sCPUFeatures = checkCPUFeatures(); - if ((sCPUFeatures & kCPUFeature_AVX) != 0) { - printf("torch running avx\n"); - } else { - printf("torch running sse \n"); - } - } - return (sCPUFeatures & feature) != 0; -} - -#endif - -void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols); -void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols); - -void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols) { -#if defined(USE_AVX) && defined(__AVX__) - int avx = haveCPUFeature(kCPUFeature_AVX); - if (avx) - { - convolve_5x5_avx(output, input, kernel, outRows, outCols, outCols, inCols); - } - else -#endif - { - convolve_5x5_sse(output, input, kernel, outRows, outCols, outCols, inCols); - } -} diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.h b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.h deleted file mode 100644 index 7b9b04c50..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve.h +++ /dev/null @@ -1 +0,0 @@ -void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols);
\ No newline at end of file diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_avx.c b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_avx.c deleted file mode 100644 index 52b6d0ffb..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_avx.c +++ /dev/null @@ -1,212 +0,0 @@ -#include <immintrin.h> -#include "common_simd.h" - -#define CLEAR_AVX() _mm256_zeroupper() - -void convolve_5x5_1_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount = count & 0xFFFFFFF8; - DECLARE_OUTPUT_1() - for (; i < alignedCount; i+=8) { - CONVOLVE_8COLS_XROWS(1, i) - } -} - -void convolve_5x5_2_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount = count & 0xFFFFFFF8; - DECLARE_OUTPUT_2() - for (; i < alignedCount; i+=8) { - CONVOLVE_8COLS_XROWS(2, i) - } -} - -void convolve_5x5_4_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount = count & 0xFFFFFFF8; - DECLARE_OUTPUT_4() - for (; i < alignedCount; i+=8) { - CONVOLVE_8COLS_XROWS(4, i) - } -} - -void convolve_5x5_5_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount = count & 0xFFFFFFF8; - DECLARE_OUTPUT_5() - for (; i < alignedCount; i+=8) { - CONVOLVE_8COLS_XROWS(5, i) - } -} - -void convolve_5x5_6_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount = count & 0xFFFFFFF8; - DECLARE_OUTPUT_6() - for (; i < alignedCount; i+=8) { - CONVOLVE_8COLS_XROWS(6, i) - } -} - -void convolve_5x5_7_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount = count & 0xFFFFFFF8; - DECLARE_OUTPUT_7() - for (; i < alignedCount; i+=8) { - CONVOLVE_8COLS_XROWS(7, i) - } -} - -void convolve_5x5_8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount = count & 0xFFFFFFF8; - DECLARE_OUTPUT_8() - for (; i < alignedCount; i+=8) { - CONVOLVE_8COLS_XROWS(8, i) - } -} - -void convolve_5x5_64x64_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - for(int i = 0; i < 60; i+=6) - { - DECLARE_OUTPUT_6() - CONVOLVE_8COLS_XROWS(6, 0) - CONVOLVE_8COLS_XROWS(6, 8) - CONVOLVE_8COLS_XROWS(6, 16) - CONVOLVE_8COLS_XROWS(6, 24) - CONVOLVE_8COLS_XROWS(6, 32) - CONVOLVE_8COLS_XROWS(6, 40) - CONVOLVE_8COLS_XROWS(6, 48) - CONVOLVE_8COLS_XROWS(6, 56) - output += outputStride * 6; - image += inputStride * 6; - } - DECLARE_OUTPUT_4() - CONVOLVE_8COLS_XROWS(4, 0) - CONVOLVE_8COLS_XROWS(4, 8) - CONVOLVE_8COLS_XROWS(4, 16) - CONVOLVE_8COLS_XROWS(4, 24) - CONVOLVE_8COLS_XROWS(4, 32) - CONVOLVE_8COLS_XROWS(4, 40) - CONVOLVE_8COLS_XROWS(4, 48) - CONVOLVE_8COLS_XROWS(4, 56) -} - -void convolve_5x5_32x32_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - for(int i = 0; i < 30; i+=6) - { - DECLARE_OUTPUT_6() - CONVOLVE_8COLS_XROWS(6, 0) - CONVOLVE_8COLS_XROWS(6, 8) - CONVOLVE_8COLS_XROWS(6, 16) - CONVOLVE_8COLS_XROWS(6, 24) - output += outputStride * 6; - image += inputStride * 6; - } - DECLARE_OUTPUT_2() - CONVOLVE_8COLS_XROWS(2, 0) - CONVOLVE_8COLS_XROWS(2, 8) - CONVOLVE_8COLS_XROWS(2, 16) - CONVOLVE_8COLS_XROWS(2, 24) -} - -void convolve_5x5_16x16_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - for(int i = 0; i < 12; i+=6) - { - DECLARE_OUTPUT_6() - CONVOLVE_8COLS_XROWS(6, 0) - CONVOLVE_8COLS_XROWS(6, 8) - output += outputStride * 6; - image += inputStride * 6; - } - DECLARE_OUTPUT_4() - CONVOLVE_8COLS_XROWS(4, 0) - CONVOLVE_8COLS_XROWS(4, 8) -} - -void convolve_5x5_8x8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - DECLARE_OUTPUT_8() - CONVOLVE_8COLS_XROWS(8, 0) -} - -void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols); - -void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) { - long ic = inCols; - long yy = 0; - float* t_ = input; - float* r_ = output; - float* k_ = kernel; - - if((outRows == 64) && (outCols == 64)) { - convolve_5x5_64x64_avx(output, input, kernel, outRows, outStride, inCols); - return; - } - - if((outRows == 32) && (outCols == 32)) { - convolve_5x5_32x32_avx(output, input, kernel, outRows, outStride, inCols); - return; - } - - if((outRows == 16) && (outCols == 16)) { - convolve_5x5_16x16_avx(output, input, kernel, outRows, outStride, inCols); - return; - } - - if((outRows == 8) && (outCols == 8)) { - convolve_5x5_8x8_avx(output, input, kernel, outRows, outStride, inCols); - return; - } - - for(; yy < (outRows / 6 ) * 6; yy += 6) { - float *pi_ = t_ + yy*ic; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_6_avx(r_, pis_, pw_, outCols, outStride, ic); - r_ += (outStride * 6); - } - - // more than 2 rows left to process and we ended up on a non-multiple of 4 - if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) { - // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop) - float *pi_ = t_ + yy*ic; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic); - r_ += (outStride * 2); - yy += 2; - } - - for(; yy < (outRows & 0xFFFFFFFC); yy += 4) { - float *pi_ = t_ + yy*ic; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_4_avx(r_, pis_, pw_, outCols, outStride, ic); - r_ += (outStride * 4); - } - - for(; yy < (outRows & 0xFFFFFFFE); yy += 2) { - float *pi_ = t_ + yy*ic; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic); - r_ += (outStride * 2); - } - - for(; yy < outRows; yy += 1) { - float *pi_ = t_ + yy*ic; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_1_avx(r_, pis_, pw_, outCols, outStride, ic); - r_ += (outStride * 1); - } - - long procCols = outCols & 0xFFFFFFF8; // avx version processes 8 cols at a time - long remCols = outCols - procCols; - - //process the rest using sse - if( remCols > 0) { - CLEAR_AVX(); - convolve_5x5_sse(&output[procCols], &input[procCols], kernel, outRows, remCols, outStride, inCols); - } -}
\ No newline at end of file diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_sse.c b/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_sse.c deleted file mode 100644 index f34b79695..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/simd/convolve5x5_sse.c +++ /dev/null @@ -1,320 +0,0 @@ -#include <emmintrin.h> -#include "common_simd.h" - - -/* SSE variants */ -void convolve_5x5_1_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount4 = count & 0xFFFFFFFC; - DECLARE_OUTPUT_1() - for (; i < alignedCount4; i+=4) { - CONVOLVE_4COLS_XROWS(1, i) - } - for (; i < (count); i++) { - float output0 = output[i + outputStride * 0]; - int row; - for (row = 0; row < 5; row++) { - int col; - for (col = 0; col < 5; col++) { - output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; - } - } - output[i + outputStride * 0] = output0; - } -} - -void convolve_5x5_2_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount4 = count & 0xFFFFFFFC; - DECLARE_OUTPUT_2() - for (; i < alignedCount4; i+=4) { - CONVOLVE_4COLS_XROWS(2, i) - } - for (; i < (count); i++) { - float output0 = output[i + outputStride * 0]; - float output1 = output[i + outputStride * 1]; - int row; - for (row = 0; row < 5; row++) { - int col; - for (col = 0; col < 5; col++) { - output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; - output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; - } - } - output[i + outputStride * 0] = output0; - output[i + outputStride * 1] = output1; - } -} - -void convolve_5x5_4_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount4 = count & 0xFFFFFFFC; - DECLARE_OUTPUT_4() - for (; i < alignedCount4; i+=4) { - CONVOLVE_4COLS_XROWS(4, i) - } - for (; i < (count); i++) { - float output0 = output[i + outputStride * 0]; - float output1 = output[i + outputStride * 1]; - float output2 = output[i + outputStride * 2]; - float output3 = output[i + outputStride * 3]; - int row; - for (row = 0; row < 5; row++) { - int col; - for (col = 0; col < 5; col++) { - output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; - output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; - output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col]; - output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col]; - } - } - output[i + outputStride * 0] = output0; - output[i + outputStride * 1] = output1; - output[i + outputStride * 2] = output2; - output[i + outputStride * 3] = output3; - } -} - -void convolve_5x5_6_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount4 = count & 0xFFFFFFFC; - DECLARE_OUTPUT_6() - for (; i < alignedCount4; i+=4) { - CONVOLVE_4COLS_XROWS(6, i) - } - for (; i<(count); i++) { - float output0 = output[i + outputStride * 0]; - float output1 = output[i + outputStride * 1]; - float output2 = output[i + outputStride * 2]; - float output3 = output[i + outputStride * 3]; - float output4 = output[i + outputStride * 4]; - float output5 = output[i + outputStride * 5]; - int row; - for (row = 0; row < 5; row++) { - int col; - for (col = 0; col < 5; col++) { - output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; - output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; - output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col]; - output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col]; - output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col]; - output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col]; - } - } - output[i + outputStride * 0] = output0; - output[i + outputStride * 1] = output1; - output[i + outputStride * 2] = output2; - output[i + outputStride * 3] = output3; - output[i + outputStride * 4] = output4; - output[i + outputStride * 5] = output5; - } -} - -void convolve_5x5_8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - long i = 0; - long alignedCount4 = count & 0xFFFFFFFC; - DECLARE_OUTPUT_8() - for (; i < alignedCount4; i+=4) { - CONVOLVE_4COLS_XROWS(8, i) - } - for (; i<(count); i++) { - float output0 = output[i + outputStride * 0]; - float output1 = output[i + outputStride * 1]; - float output2 = output[i + outputStride * 2]; - float output3 = output[i + outputStride * 3]; - float output4 = output[i + outputStride * 4]; - float output5 = output[i + outputStride * 5]; - float output6 = output[i + outputStride * 6]; - float output7 = output[i + outputStride * 7]; - int row; - for (row = 0; row < 5; row++) { - int col; - for (col = 0; col < 5; col++) { - output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col]; - output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col]; - output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col]; - output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col]; - output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col]; - output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col]; - output6 += weight[5 * row + col] * image[i + (row + 6) * inputStride + col]; - output7 += weight[5 * row + col] * image[i + (row + 7) * inputStride + col]; - } - } - output[i + outputStride * 0] = output0; - output[i + outputStride * 1] = output1; - output[i + outputStride * 2] = output2; - output[i + outputStride * 3] = output3; - output[i + outputStride * 4] = output4; - output[i + outputStride * 5] = output5; - output[i + outputStride * 6] = output6; - output[i + outputStride * 7] = output7; - } -} - -#define UNROLL_SSE_CONVOLUTION 0 -#if (UNROLL_SSE_CONVOLUTION) - -void convolve_5x5_64x64_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - for(int i = 0; i < 60; i+=6) - { - DECLARE_OUTPUT_6() - CONVOLVE_4COLS_XROWS(6, 0) - CONVOLVE_4COLS_XROWS(6, 4) - CONVOLVE_4COLS_XROWS(6, 8) - CONVOLVE_4COLS_XROWS(6, 12) - CONVOLVE_4COLS_XROWS(6, 16) - CONVOLVE_4COLS_XROWS(6, 20) - CONVOLVE_4COLS_XROWS(6, 24) - CONVOLVE_4COLS_XROWS(6, 28) - CONVOLVE_4COLS_XROWS(6, 32) - CONVOLVE_4COLS_XROWS(6, 36) - CONVOLVE_4COLS_XROWS(6, 40) - CONVOLVE_4COLS_XROWS(6, 44) - CONVOLVE_4COLS_XROWS(6, 48) - CONVOLVE_4COLS_XROWS(6, 52) - CONVOLVE_4COLS_XROWS(6, 56) - CONVOLVE_4COLS_XROWS(6, 60) - output += outputStride * 6; - image += inputStride * 6; - } - DECLARE_OUTPUT_4() - CONVOLVE_4COLS_XROWS(4, 0) - CONVOLVE_4COLS_XROWS(4, 4) - CONVOLVE_4COLS_XROWS(4, 8) - CONVOLVE_4COLS_XROWS(4, 12) - CONVOLVE_4COLS_XROWS(4, 16) - CONVOLVE_4COLS_XROWS(4, 20) - CONVOLVE_4COLS_XROWS(4, 24) - CONVOLVE_4COLS_XROWS(4, 28) - CONVOLVE_4COLS_XROWS(4, 32) - CONVOLVE_4COLS_XROWS(4, 36) - CONVOLVE_4COLS_XROWS(4, 40) - CONVOLVE_4COLS_XROWS(4, 44) - CONVOLVE_4COLS_XROWS(4, 48) - CONVOLVE_4COLS_XROWS(4, 52) - CONVOLVE_4COLS_XROWS(4, 56) - CONVOLVE_4COLS_XROWS(4, 60) -} - -void convolve_5x5_32x32_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - for(int i = 0; i < 30; i+=6) - { - DECLARE_OUTPUT_6() - - CONVOLVE_4COLS_XROWS(6, 0) - CONVOLVE_4COLS_XROWS(6, 4) - CONVOLVE_4COLS_XROWS(6, 8) - CONVOLVE_4COLS_XROWS(6, 12) - CONVOLVE_4COLS_XROWS(6, 16) - CONVOLVE_4COLS_XROWS(6, 20) - CONVOLVE_4COLS_XROWS(6, 24) - CONVOLVE_4COLS_XROWS(6, 28) - - output += outputStride * 6; - image += inputStride * 6; - } - DECLARE_OUTPUT_2() - CONVOLVE_4COLS_XROWS(2, 0) - CONVOLVE_4COLS_XROWS(2, 4) - CONVOLVE_4COLS_XROWS(2, 8) - CONVOLVE_4COLS_XROWS(2, 12) - CONVOLVE_4COLS_XROWS(2, 16) - CONVOLVE_4COLS_XROWS(2, 20) - CONVOLVE_4COLS_XROWS(2, 24) - CONVOLVE_4COLS_XROWS(2, 28) -} - -void convolve_5x5_16x16_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - for(int i = 0; i < 12; i+=6) - { - DECLARE_OUTPUT_6() - CONVOLVE_4COLS_XROWS(6, 0) - CONVOLVE_4COLS_XROWS(6, 4) - CONVOLVE_4COLS_XROWS(6, 8) - CONVOLVE_4COLS_XROWS(6, 12) - output += outputStride * 6; - image += inputStride * 6; - } - DECLARE_OUTPUT_4() - CONVOLVE_4COLS_XROWS(4, 0) - CONVOLVE_4COLS_XROWS(4, 4) - CONVOLVE_4COLS_XROWS(4, 8) - CONVOLVE_4COLS_XROWS(4, 12) -} - -void convolve_5x5_8x8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) { - DECLARE_OUTPUT_8() - CONVOLVE_4COLS_XROWS(8, 0) - CONVOLVE_4COLS_XROWS(8, 4) -} - -#endif - -void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) { - long yy = 0; - float* t_ = input; - float* r_ = output; - float* k_ = kernel; -#if (UNROLL_SSE_CONVOLUTION) - if((outRows == 64) && (outCols == 64)) { - convolve_5x5_64x64_sse(output, input, kernel, outRows, outStride, inCols); - return; - } - - if((outRows == 32) && (outCols == 32)) { - convolve_5x5_32x32_sse(output, input, kernel, outRows, outStride, inCols); - return; - } - - if((outRows == 16) && (outCols == 16)) { - convolve_5x5_16x16_sse(output, input, kernel, outRows, outStride, inCols); - return; - } - - if((outRows == 8) && (outCols == 8)) { - convolve_5x5_8x8_sse(output, input, kernel, outRows, outStride, inCols); - return; - } -#endif - for(; yy < (outRows / 6 ) * 6; yy += 6) { - float *pi_ = t_ + yy*inCols; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_6_sse(r_, pis_, pw_, outCols, outStride, inCols); - r_ += (outStride * 6); - } - // more than 2 rows left to process and we ended up on a non-multiple of 4 - if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) { - // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop) - float *pi_ = t_ + yy*inCols; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols); - r_ += (outStride * 2); - yy += 2; - } - - for(; yy < (outRows & 0xFFFFFFFC); yy += 4) { - float *pi_ = t_ + yy*inCols; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_4_sse(r_, pis_, pw_, outCols, outStride, inCols); - r_ += (outStride * 4); - } - - for(; yy < (outRows & 0xFFFFFFFE); yy += 2) { - float *pi_ = t_ + yy*inCols; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols); - r_ += (outStride * 2); - } - - for(; yy < outRows; yy += 1) { - float *pi_ = t_ + yy*inCols; - float *pw_ = k_; - float *pis_ = pi_; - convolve_5x5_1_sse(r_, pis_, pw_, outCols, outStride, inCols); - r_ += (outStride * 1); - } -} diff --git a/contrib/lua-torch/torch7/lib/TH/generic/simd/simd.h b/contrib/lua-torch/torch7/lib/TH/generic/simd/simd.h deleted file mode 100644 index b1878ad5b..000000000 --- a/contrib/lua-torch/torch7/lib/TH/generic/simd/simd.h +++ /dev/null @@ -1,165 +0,0 @@ -#ifndef TH_SIMD_INC -#define TH_SIMD_INC - -#include <stdint.h> -#include <stdlib.h> -#if defined(_MSC_VER) -#include <intrin.h> -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) -#include <cpuid.h> -#endif - -// Can be found on Intel ISA Reference for CPUID -#define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7 -#define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1 -#define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1 - -// Helper macros for initialization -#define FUNCTION_IMPL(NAME, EXT) \ - { .function=(void *)NAME, \ - .supportedSimdExt=EXT \ - } - -#define INIT_DISPATCH_PTR(OP) \ - do { \ - int i; \ - for (i = 0; i < sizeof(THVector_(OP ## _DISPATCHTABLE)) / sizeof(FunctionDescription); ++i) { \ - THVector_(OP ## _DISPATCHPTR) = THVector_(OP ## _DISPATCHTABLE)[i].function; \ - if (THVector_(OP ## _DISPATCHTABLE)[i].supportedSimdExt & hostSimdExts) { \ - break; \ - } \ - } \ - } while(0) - - -typedef struct FunctionDescription -{ - void *function; - uint32_t supportedSimdExt; -} FunctionDescription; - - -enum SIMDExtensions -{ -#if defined(__NEON__) - SIMDExtension_NEON = 0x1, -#elif defined(__PPC64__) - SIMDExtension_VSX = 0x1, -#else - SIMDExtension_AVX2 = 0x1, - SIMDExtension_AVX = 0x2, - SIMDExtension_SSE = 0x4, -#endif - SIMDExtension_DEFAULT = 0x0 -}; - - -#if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 - - #if defined(__NEON__) - -static inline uint32_t detectHostSIMDExtensions() -{ - return SIMDExtension_NEON; -} - - #else //ARM without NEON - -static inline uint32_t detectHostSIMDExtensions() -{ - return SIMDExtension_DEFAULT; -} - - #endif - -#elif defined(__PPC64__) - - #if defined(__VSX__) - -static inline uint32_t detectHostSIMDExtensions() -{ - uint32_t hostSimdExts = SIMDExtension_DEFAULT; - char *evar; - - evar = getenv("TH_NO_VSX"); - if (evar == NULL || strncmp(evar, "1", 2) != 0) - hostSimdExts = SIMDExtension_VSX; - return hostSimdExts; -} - - #else //PPC64 without VSX - -static inline uint32_t detectHostSIMDExtensions() -{ - return SIMDExtension_DEFAULT; -} - - #endif - -#else // x86 -static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) -{ -#if defined(_MSC_VER) - uint32_t cpuInfo[4]; - __cpuid(cpuInfo, *eax); - *eax = cpuInfo[0]; - *ebx = cpuInfo[1]; - *ecx = cpuInfo[2]; - *edx = cpuInfo[3]; -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) - uint32_t level = *eax; - __get_cpuid (level, eax, ebx, ecx, edx); -#else - uint32_t a = *eax, b, c = *ecx, d; - __asm volatile ( "cpuid\n\t" - : "+a"(a), "=b"(b), "+c"(c), "=d"(d) ); - *eax = a; - *ebx = b; - *ecx = c; - *edx = d; -#endif -} - -static inline uint32_t detectHostSIMDExtensions() -{ - uint32_t eax, ebx, ecx, edx; - uint32_t hostSimdExts = 0x0; - int TH_NO_AVX = 1, TH_NO_AVX2 = 1, TH_NO_SSE = 1; - char *evar; - - evar = getenv("TH_NO_AVX2"); - if (evar == NULL || strncmp(evar, "1", 2) != 0) - TH_NO_AVX2 = 0; - - // Check for AVX2. Requires separate CPUID - eax = 0x7; - ecx = 0x0; - cpuid(&eax, &ebx, &ecx, &edx); - if ((ebx & CPUID_AVX2_BIT) && TH_NO_AVX2 == 0) { - hostSimdExts |= SIMDExtension_AVX2; - } - - // Detect and enable AVX and SSE - eax = 0x1; - cpuid(&eax, &ebx, &ecx, &edx); - - evar = getenv("TH_NO_AVX"); - if (evar == NULL || strncmp(evar, "1", 2) != 0) - TH_NO_AVX = 0; - if (ecx & CPUID_AVX_BIT && TH_NO_AVX == 0) { - hostSimdExts |= SIMDExtension_AVX; - } - - evar = getenv("TH_NO_SSE"); - if (evar == NULL || strncmp(evar, "1", 2) != 0) - TH_NO_SSE = 0; - if (edx & CPUID_SSE_BIT && TH_NO_SSE == 0) { - hostSimdExts |= SIMDExtension_SSE; - } - - return hostSimdExts; -} - -#endif // end SIMD extension detection code - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX.c b/contrib/lua-torch/torch7/lib/TH/vector/AVX.c deleted file mode 100644 index 58c4e6d35..000000000 --- a/contrib/lua-torch/torch7/lib/TH/vector/AVX.c +++ /dev/null @@ -1,274 +0,0 @@ -#if defined(USE_AVX) && defined(__AVX__) -#ifndef _MSC_VER -#include <x86intrin.h> -#else -#include <intrin.h> -#endif - -#include "AVX.h" - -void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) { - ptrdiff_t i; - ptrdiff_t off; - for (i=0; i<=((n)-8); i+=8) { - _mm256_storeu_pd(y+i, _mm256_loadu_pd(x+i)); - _mm256_storeu_pd(y+i+4, _mm256_loadu_pd(x+i+4)); - } - off = (n) - ((n)%8); - for (i=0; i<((n)%8); i++) { - y[off+i] = x[off+i]; - } -} - -void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - ptrdiff_t off; - __m256d YMM0 = _mm256_set_pd(c, c, c, c); - for (i=0; i<=((n)-16); i+=16) { - _mm256_storeu_pd((x)+i , YMM0); - _mm256_storeu_pd((x)+i+4, YMM0); - _mm256_storeu_pd((x)+i+8, YMM0); - _mm256_storeu_pd((x)+i+12, YMM0); - } - off = (n) - ((n)%16); - for (i=0; i<((n)%16); i++) { - x[off+i] = c; - } -} - -void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) { - ptrdiff_t i; - __m256d YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-8); i+=8) { - YMM0 = _mm256_loadu_pd(x+i); - YMM1 = _mm256_loadu_pd(x+i+4); - YMM2 = _mm256_loadu_pd(y+i); - YMM3 = _mm256_loadu_pd(y+i+4); - YMM2 = _mm256_div_pd(YMM0, YMM2); - YMM3 = _mm256_div_pd(YMM1, YMM3); - _mm256_storeu_pd(z+i, YMM2); - _mm256_storeu_pd(z+i+4, YMM3); - } - for (; i<(n); i++) { - z[i] = x[i] / y[i]; - } -} - -void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m256d YMM15 = _mm256_set_pd(c, c, c, c); - __m256d YMM0, YMM1; - for (i=0; i<=((n)-8); i+=8) { - YMM0 = _mm256_loadu_pd(x+i); - YMM1 = _mm256_loadu_pd(x+i+4); - YMM0 = _mm256_div_pd(YMM0, YMM15); - YMM1 = _mm256_div_pd(YMM1, YMM15); - _mm256_storeu_pd(y+i, YMM0); - _mm256_storeu_pd(y+i+4, YMM1); - } - for (; i<(n); i++) { - y[i] = x[i] / c; - } -} - -void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) { - ptrdiff_t i; - __m256d YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-8); i+=8) { - YMM0 = _mm256_loadu_pd(x+i); - YMM1 = _mm256_loadu_pd(x+i+4); - YMM2 = _mm256_loadu_pd(y+i); - YMM3 = _mm256_loadu_pd(y+i+4); - YMM2 = _mm256_mul_pd(YMM0, YMM2); - YMM3 = _mm256_mul_pd(YMM1, YMM3); - _mm256_storeu_pd(z+i, YMM2); - _mm256_storeu_pd(z+i+4, YMM3); - } - for (; i<n; i++) { - z[i] = x[i] * y[i]; - } -} - -void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m256d YMM15 = _mm256_set_pd(c, c, c, c); - __m256d YMM0, YMM1; - for (i=0; i<=((n)-8); i+=8) { - YMM0 = _mm256_loadu_pd(x+i); - YMM1 = _mm256_loadu_pd(x+i+4); - YMM0 = _mm256_mul_pd(YMM0, YMM15); - YMM1 = _mm256_mul_pd(YMM1, YMM15); - _mm256_storeu_pd(y+i, YMM0); - _mm256_storeu_pd(y+i+4, YMM1); - } - for (; i<n; i++) { - y[i] = x[i] * c; - } -} - -void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m256d YMM15 = _mm256_set_pd(c, c, c, c); - __m256d YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-4); i+=4) { - YMM0 = _mm256_loadu_pd(y+i); - YMM1 = _mm256_loadu_pd(x+i); - YMM2 = _mm256_mul_pd(YMM0, YMM15); - YMM3 = _mm256_add_pd(YMM1, YMM2); - _mm256_storeu_pd(z+i, YMM3); - } - for (; i<(n); i++) { - z[i] = x[i] + y[i] * c; - } -} - -void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m256d YMM15 = _mm256_set_pd(c, c, c, c); - __m256d YMM0, YMM1; - for (i=0; i<=((n)-8); i+=8) { - YMM0 = _mm256_loadu_pd(x+i); - YMM1 = _mm256_loadu_pd(x+i+4); - YMM0 = _mm256_add_pd(YMM0, YMM15); - YMM1 = _mm256_add_pd(YMM1, YMM15); - _mm256_storeu_pd(y+i, YMM0); - _mm256_storeu_pd(y+i+4, YMM1); - } - for (; i<(n); i++) { - y[i] = x[i] + c; - } -} - -void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) { - ptrdiff_t i; - ptrdiff_t off; - for (i=0; i<=((n)-16); i+=16) { - _mm256_storeu_ps(y+i, _mm256_loadu_ps(x+i)); - _mm256_storeu_ps(y+i+8, _mm256_loadu_ps(x+i+8)); - } - off = (n) - ((n)%16); - for (i=0; i<((n)%16); i++) { - y[off+i] = x[off+i]; - } -} - -void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - ptrdiff_t off; - __m256 YMM0 = _mm256_set_ps(c, c, c, c, c, c, c, c); - for (i=0; i<=((n)-32); i+=32) { - _mm256_storeu_ps((x)+i , YMM0); - _mm256_storeu_ps((x)+i+8, YMM0); - _mm256_storeu_ps((x)+i+16, YMM0); - _mm256_storeu_ps((x)+i+24, YMM0); - } - off = (n) - ((n)%32); - for (i=0; i<((n)%32); i++) { - x[off+i] = c; - } -} - -void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) { - ptrdiff_t i; - __m256 YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-16); i+=16) { - YMM0 = _mm256_loadu_ps(x+i); - YMM1 = _mm256_loadu_ps(x+i+8); - YMM2 = _mm256_loadu_ps(y+i); - YMM3 = _mm256_loadu_ps(y+i+8); - YMM2 = _mm256_div_ps(YMM0, YMM2); - YMM3 = _mm256_div_ps(YMM1, YMM3); - _mm256_storeu_ps(z+i, YMM2); - _mm256_storeu_ps(z+i+8, YMM3); - } - for (; i<(n); i++) { - z[i] = x[i] / y[i]; - } -} - -void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); - __m256 YMM0, YMM1; - for (i=0; i<=((n)-16); i+=16) { - YMM0 = _mm256_loadu_ps(x+i); - YMM1 = _mm256_loadu_ps(x+i+8); - YMM0 = _mm256_div_ps(YMM0, YMM15); - YMM1 = _mm256_div_ps(YMM1, YMM15); - _mm256_storeu_ps(y+i, YMM0); - _mm256_storeu_ps(y+i+8, YMM1); - } - for (; i<(n); i++) { - y[i] = x[i] / c; - } -} - -void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) { - ptrdiff_t i; - __m256 YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-16); i+=16) { - YMM0 = _mm256_loadu_ps(x+i); - YMM1 = _mm256_loadu_ps(x+i+8); - YMM2 = _mm256_loadu_ps(y+i); - YMM3 = _mm256_loadu_ps(y+i+8); - YMM2 = _mm256_mul_ps(YMM0, YMM2); - YMM3 = _mm256_mul_ps(YMM1, YMM3); - _mm256_storeu_ps(z+i, YMM2); - _mm256_storeu_ps(z+i+8, YMM3); - } - for (; i<n; i++) { - z[i] = x[i] * y[i]; - } -} - -void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); - __m256 YMM0, YMM1; - for (i=0; i<=((n)-16); i+=16) { - YMM0 = _mm256_loadu_ps(x+i); - YMM1 = _mm256_loadu_ps(x+i+8); - YMM0 = _mm256_mul_ps(YMM0, YMM15); - YMM1 = _mm256_mul_ps(YMM1, YMM15); - _mm256_storeu_ps(y+i, YMM0); - _mm256_storeu_ps(y+i+8, YMM1); - } - for (; i<n; i++) { - y[i] = x[i] * c; - } -} - -void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); - __m256 YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-8); i+=8) { - YMM0 = _mm256_loadu_ps(y+i); - YMM1 = _mm256_loadu_ps(x+i); - YMM2 = _mm256_mul_ps(YMM0, YMM15); - YMM3 = _mm256_add_ps(YMM1, YMM2); - _mm256_storeu_ps(z+i, YMM3); - } - for (; i<(n); i++) { - z[i] = x[i] + y[i] * c; - } -} - -void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); - __m256 YMM0, YMM1; - for (i=0; i<=((n)-16); i+=16) { - YMM0 = _mm256_loadu_ps(x+i); - YMM1 = _mm256_loadu_ps(x+i+8); - YMM0 = _mm256_add_ps(YMM0, YMM15); - YMM1 = _mm256_add_ps(YMM1, YMM15); - _mm256_storeu_ps(y+i, YMM0); - _mm256_storeu_ps(y+i+8, YMM1); - } - for (; i<(n); i++) { - y[i] = x[i] + c; - } -} - -#endif // defined(__AVX__) diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX.h b/contrib/lua-torch/torch7/lib/TH/vector/AVX.h deleted file mode 100644 index bfaeaa6b0..000000000 --- a/contrib/lua-torch/torch7/lib/TH/vector/AVX.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef TH_AVX_H -#define TH_AVX_H - -#include <stddef.h> - -void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n); -void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n); -void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n); -void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n); -void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n); -void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n); -void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n); -void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n); -void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n); -void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n); -void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n); -void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n); -void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n); -void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n); -void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n); -void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.c b/contrib/lua-torch/torch7/lib/TH/vector/AVX2.c deleted file mode 100644 index 082a680ea..000000000 --- a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.c +++ /dev/null @@ -1,47 +0,0 @@ -#if defined(__AVX2__) -#ifndef _MSC_VER -#include <x86intrin.h> -#else -#include <intrin.h> -#endif -#include "AVX2.h" - -void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m256d YMM15 = _mm256_set_pd(c, c, c, c); - __m256d YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-8); i+=8) { - YMM0 = _mm256_loadu_pd(y+i); - YMM1 = _mm256_loadu_pd(y+i+4); - YMM2 = _mm256_loadu_pd(x+i); - YMM3 = _mm256_loadu_pd(x+i+4); - YMM2 = _mm256_fmadd_pd(YMM0, YMM15, YMM2); - YMM3 = _mm256_fmadd_pd(YMM1, YMM15, YMM3); - _mm256_storeu_pd(z+i, YMM2); - _mm256_storeu_pd(z+i+4, YMM3); - } - for (; i<(n); i++) { - z[i] = x[i] + y[i] * c; - } -} - -void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c); - __m256 YMM0, YMM1, YMM2, YMM3; - for (i=0; i<=((n)-16); i+=16) { - YMM0 = _mm256_loadu_ps(y+i); - YMM1 = _mm256_loadu_ps(y+i+8); - YMM2 = _mm256_loadu_ps(x+i); - YMM3 = _mm256_loadu_ps(x+i+8); - YMM2 = _mm256_fmadd_ps(YMM0, YMM15, YMM2); - YMM3 = _mm256_fmadd_ps(YMM1, YMM15, YMM3); - _mm256_storeu_ps(z+i, YMM2); - _mm256_storeu_ps(z+i+8, YMM3); - } - for (; i<(n); i++) { - z[i] = x[i] + y[i] * c; - } -} - -#endif // defined(__AVX2__) diff --git a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.h b/contrib/lua-torch/torch7/lib/TH/vector/AVX2.h deleted file mode 100644 index 85a9e93ee..000000000 --- a/contrib/lua-torch/torch7/lib/TH/vector/AVX2.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef TH_AVX2_H -#define TH_AVX2_H - -#include <stddef.h> - -void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n); -void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n); - -#endif diff --git a/contrib/lua-torch/torch7/lib/TH/vector/NEON.c b/contrib/lua-torch/torch7/lib/TH/vector/NEON.c deleted file mode 100644 index 7920fb13b..000000000 --- a/contrib/lua-torch/torch7/lib/TH/vector/NEON.c +++ /dev/null @@ -1,105 +0,0 @@ -static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) { - long i = 0; - - for(; i < n-4; i += 4) - { - x[i] = c; - x[i+1] = c; - x[i+2] = c; - x[i+3] = c; - } - - for(; i < n; i++) - x[i] = c; - -} - -static void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) { - long i = 0; - - for(; i < n-4; i += 4) - { - z[i] = x[i] * y[i]; - z[i+1] = x[i+1] * y[i+1]; - z[i+2] = x[i+2] * y[i+2]; - z[i+3] = x[i+3] * y[i+3]; - } - - for(; i < n; i++) - z[i] = x[i] * y[i]; -} - -static void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { - long i = 0; - - for(; i < n-4; i += 4) - { - y[i] = x[i] * c; - y[i+1] = x[i+1] * c; - y[i+2] = x[i+2] * c; - y[i+3] = x[i+3] * c; - } - - for(; i < n; i++) - y[i] = x[i] * c; -} - -static void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { - long i = 0; - - for(;i < n-4; i += 4) - { - z[i] = x[i] + c * y[i]; - z[i+1] = x[i+1] + c * y[i+1]; - z[i+2] = x[i+2] + c * y[i+2]; - z[i+3] = x[i+3] + c * y[i+3]; - } - - for(; i < n; i++) - z[i] = x[i] + c * y[i]; -} - -static void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { - long i = 0; - - for(;i < n-4; i += 4) - { - y[i] = x[i] + c; - y[i+1] = x[i+1] + c; - y[i+2] = x[i+2] + c; - y[i+3] = x[i+3] + c; - } - - for(; i < n; i++) - y[i] = x[i] + c; -} - -static void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) { - long i = 0; - - for(;i < n-4; i += 4) - { - z[i] = x[i] / y[i]; - z[i+1] = x[i+1] / y[i+1]; - z[i+2] = x[i+2] / y[i+2]; - z[i+3] = x[i+3] / y[i+3]; - } - - for(; i < n; i++) - z[i] = x[i] / y[i]; -} - -static void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) { - long i = 0; - - for(;i < n-4; i += 4) - { - y[i] = x[i] / c; - y[i+1] = x[i+1] / c; - y[i+2] = x[i+2] / c; - y[i+3] = x[i+3] / c; - } - - for(; i < n; i++) - y[i] = x[i] / c; -} diff --git a/contrib/lua-torch/torch7/lib/TH/vector/SSE.c b/contrib/lua-torch/torch7/lib/TH/vector/SSE.c deleted file mode 100644 index d026935ab..000000000 --- a/contrib/lua-torch/torch7/lib/TH/vector/SSE.c +++ /dev/null @@ -1,268 +0,0 @@ -#ifndef _MSC_VER -#include <x86intrin.h> -#else -#include <intrin.h> -#endif - -static void THDoubleVector_fill_SSE(double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - ptrdiff_t off; - __m128d XMM0 = _mm_set1_pd(c); - for (i=0; i<=((n)-8); i+=8) { - _mm_storeu_pd((x)+i , XMM0); - _mm_storeu_pd((x)+i+2, XMM0); - _mm_storeu_pd((x)+i+4, XMM0); - _mm_storeu_pd((x)+i+6, XMM0); - } - off = (n) - ((n)%8); - for (i=0; i<((n)%8); i++) { - x[off+i] = c; - } -} - -static void THDoubleVector_cadd_SSE(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m128d XMM7 = _mm_set1_pd(c); - __m128d XMM0, XMM2; - for (i=0; i<=((n)-2); i+=2) { - XMM0 = _mm_loadu_pd((x)+i); - XMM2 = _mm_loadu_pd((y)+i); - XMM2 = _mm_mul_pd(XMM2, XMM7); - XMM2 = _mm_add_pd(XMM0, XMM2); - _mm_storeu_pd((z)+i, XMM2); - } - for (; i<(n); i++) { - z[i] = x[i] + c * y[i]; - } -} - -static void THDoubleVector_adds_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m128d XMM7 = _mm_set1_pd(c); - __m128d XMM0, XMM2; - for (i=0; i<=((n)-4); i+=4) { - XMM0 = _mm_loadu_pd((x)+i); - XMM2 = _mm_loadu_pd((x)+i+2); - XMM0 = _mm_add_pd(XMM0, XMM7); - XMM2 = _mm_add_pd(XMM2, XMM7); - _mm_storeu_pd((y)+i, XMM0); - _mm_storeu_pd((y)+i+2, XMM2); - } - for (; i<(n); i++) { - y[i] = x[i] + c; - } -} - -static void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) { - ptrdiff_t i; - for (i=0; i<=((n)-8); i+=8) { - __m128d XMM0 = _mm_loadu_pd((x)+i ); - __m128d XMM1 = _mm_loadu_pd((x)+i+2); - __m128d XMM2 = _mm_loadu_pd((x)+i+4); - __m128d XMM3 = _mm_loadu_pd((x)+i+6); - __m128d XMM4 = _mm_loadu_pd((y)+i ); - __m128d XMM5 = _mm_loadu_pd((y)+i+2); - __m128d XMM6 = _mm_loadu_pd((y)+i+4); - __m128d XMM7 = _mm_loadu_pd((y)+i+6); - XMM4 = _mm_mul_pd(XMM4, XMM0); - XMM5 = _mm_mul_pd(XMM5, XMM1); - XMM6 = _mm_mul_pd(XMM6, XMM2); - XMM7 = _mm_mul_pd(XMM7, XMM3); - _mm_storeu_pd((z)+i , XMM4); - _mm_storeu_pd((z)+i+2, XMM5); - _mm_storeu_pd((z)+i+4, XMM6); - _mm_storeu_pd((z)+i+6, XMM7); - } - for (; i<(n); i++) { - z[i] = x[i] * y[i]; - } -} - -static void THDoubleVector_muls_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m128d XMM15 = _mm_set1_pd(c); - for (i=0; i<=((n)-8); i+=8) { - __m128d XMM0 = _mm_loadu_pd((x)+i ); - __m128d XMM1 = _mm_loadu_pd((x)+i+2); - __m128d XMM2 = _mm_loadu_pd((x)+i+4); - __m128d XMM3 = _mm_loadu_pd((x)+i+6); - __m128d XMM4 = _mm_mul_pd(XMM15, XMM0); - __m128d XMM5 = _mm_mul_pd(XMM15, XMM1); - __m128d XMM6 = _mm_mul_pd(XMM15, XMM2); - __m128d XMM7 = _mm_mul_pd(XMM15, XMM3); - _mm_storeu_pd((y)+i , XMM4); - _mm_storeu_pd((y)+i+2, XMM5); - _mm_storeu_pd((y)+i+4, XMM6); - _mm_storeu_pd((y)+i+6, XMM7); - } - for (; i<(n); i++) { - y[i] = x[i] * c; - } -} - -static void THDoubleVector_cdiv_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) { - ptrdiff_t i; - __m128d XMM0, XMM1, XMM2, XMM3; - for (i=0; i<=((n)-4); i+=4) { - XMM0 = _mm_loadu_pd(x+i); - XMM1 = _mm_loadu_pd(x+i+2); - XMM2 = _mm_loadu_pd(y+i); - XMM3 = _mm_loadu_pd(y+i+2); - XMM2 = _mm_div_pd(XMM0, XMM2); - XMM3 = _mm_div_pd(XMM1, XMM3); - _mm_storeu_pd(z+i, XMM2); - _mm_storeu_pd(z+i+2, XMM3); - } - for (; i<(n); i++) { - z[i] = x[i] / y[i]; - } -} - -static void THDoubleVector_divs_SSE(double *y, const double *x, const double c, const ptrdiff_t n) { - ptrdiff_t i; - __m128d XMM7 = _mm_set1_pd(c); - __m128d XMM0, XMM1; - for (i=0; i<=((n)-4); i+=4) { - XMM0 = _mm_loadu_pd(x+i); - XMM1 = _mm_loadu_pd(x+i+2); - XMM0 = _mm_div_pd(XMM0, XMM7); - XMM1 = _mm_div_pd(XMM1, XMM7); - _mm_storeu_pd(y+i, XMM0); - _mm_storeu_pd(y+i+2, XMM1); - } - for (; i<(n); i++) { - y[i] = x[i] / c; - } -} - -static void THFloatVector_fill_SSE(float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m128 XMM0 = _mm_set_ps1(c); - ptrdiff_t off; - for (i=0; i<=((n)-16); i+=16) { - _mm_storeu_ps((x)+i , XMM0); - _mm_storeu_ps((x)+i+4, XMM0); - _mm_storeu_ps((x)+i+8, XMM0); - _mm_storeu_ps((x)+i+12, XMM0); - } - off = (n) - ((n)%16); - for (i=0; i<((n)%16); i++) { - x[off+i] = c; - } -} - - -static void THFloatVector_cadd_SSE(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m128 XMM7 = _mm_set_ps1(c); - __m128 XMM0, XMM2; - for (i=0; i<=((n)-4); i+=4) { - XMM0 = _mm_loadu_ps((x)+i); - XMM2 = _mm_loadu_ps((y)+i); - XMM2 = _mm_mul_ps(XMM2, XMM7); - XMM2 = _mm_add_ps(XMM0, XMM2); - _mm_storeu_ps((z)+i, XMM2); - } - for (; i<(n); i++) { - z[i] = x[i] + c * y[i]; - } -} - -static void THFloatVector_adds_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m128 XMM7 = _mm_set1_ps(c); - __m128 XMM0, XMM2; - for (i=0; i<=((n)-8); i+=8) { - XMM0 = _mm_loadu_ps((x)+i); - XMM2 = _mm_loadu_ps((x)+i+4); - XMM0 = _mm_add_ps(XMM0, XMM7); - XMM2 = _mm_add_ps(XMM2, XMM7); - _mm_storeu_ps((y)+i, XMM0); - _mm_storeu_ps((y)+i+4, XMM2); - } - for (; i<(n); i++) { - y[i] = x[i] + c; - } -} - -static void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) { - ptrdiff_t i; - for (i=0; i<=((n)-16); i+=16) { - __m128 XMM0 = _mm_loadu_ps((x)+i ); - __m128 XMM1 = _mm_loadu_ps((x)+i+ 4); - __m128 XMM2 = _mm_loadu_ps((x)+i+ 8); - __m128 XMM3 = _mm_loadu_ps((x)+i+12); - __m128 XMM4 = _mm_loadu_ps((y)+i ); - __m128 XMM5 = _mm_loadu_ps((y)+i+ 4); - __m128 XMM6 = _mm_loadu_ps((y)+i+ 8); - __m128 XMM7 = _mm_loadu_ps((y)+i+12); - XMM4 = _mm_mul_ps(XMM4, XMM0); - XMM5 = _mm_mul_ps(XMM5, XMM1); - XMM6 = _mm_mul_ps(XMM6, XMM2); - XMM7 = _mm_mul_ps(XMM7, XMM3); - _mm_storeu_ps((z)+i , XMM4); - _mm_storeu_ps((z)+i+ 4, XMM5); - _mm_storeu_ps((z)+i+ 8, XMM6); - _mm_storeu_ps((z)+i+12, XMM7); - } - for (; i<(n); i++) { - z[i] = x[i] * y[i]; - } -} - -static void THFloatVector_muls_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m128 XMM15 = _mm_set_ps1(c); - for (i=0; i<=((n)-16); i+=16) { - __m128 XMM0 = _mm_loadu_ps((x)+i ); - __m128 XMM1 = _mm_loadu_ps((x)+i+ 4); - __m128 XMM2 = _mm_loadu_ps((x)+i+ 8); - __m128 XMM3 = _mm_loadu_ps((x)+i+12); - __m128 XMM4 = _mm_mul_ps(XMM15, XMM0); - __m128 XMM5 = _mm_mul_ps(XMM15, XMM1); - __m128 XMM6 = _mm_mul_ps(XMM15, XMM2); - __m128 XMM7 = _mm_mul_ps(XMM15, XMM3); - _mm_storeu_ps((y)+i , XMM4); - _mm_storeu_ps((y)+i+ 4, XMM5); - _mm_storeu_ps((y)+i+ 8, XMM6); - _mm_storeu_ps((y)+i+12, XMM7); - } - for (; i<(n); i++) { - y[i] = x[i] * c; - } -} - -static void THFloatVector_cdiv_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) { - ptrdiff_t i; - __m128 XMM0, XMM1, XMM2, XMM3; - for (i=0; i<=((n)-8); i+=8) { - XMM0 = _mm_loadu_ps(x+i); - XMM1 = _mm_loadu_ps(x+i+4); - XMM2 = _mm_loadu_ps(y+i); - XMM3 = _mm_loadu_ps(y+i+4); - XMM2 = _mm_div_ps(XMM0, XMM2); - XMM3 = _mm_div_ps(XMM1, XMM3); - _mm_storeu_ps(z+i, XMM2); - _mm_storeu_ps(z+i+4, XMM3); - } - for (; i<(n); i++) { - z[i] = x[i] / y[i]; - } -} - -static void THFloatVector_divs_SSE(float *y, const float *x, const float c, const ptrdiff_t n) { - ptrdiff_t i; - __m128 XMM7 = _mm_set1_ps(c); - __m128 XMM0, XMM1; - for (i=0; i<=((n)-8); i+=8) { - XMM0 = _mm_loadu_ps(x+i); - XMM1 = _mm_loadu_ps(x+i+4); - XMM0 = _mm_div_ps(XMM0, XMM7); - XMM1 = _mm_div_ps(XMM1, XMM7); - _mm_storeu_ps(y+i, XMM0); - _mm_storeu_ps(y+i+4, XMM1); - } - for (; i<(n); i++) { - y[i] = x[i] / c; - } -} diff --git a/contrib/lua-torch/torch7/lib/TH/vector/VSX.c b/contrib/lua-torch/torch7/lib/TH/vector/VSX.c deleted file mode 100644 index 9ff984ad7..000000000 --- a/contrib/lua-torch/torch7/lib/TH/vector/VSX.c +++ /dev/null @@ -1,2520 +0,0 @@ -#ifdef __PPC64__ -#include <altivec.h> -#include <stddef.h> - - -//-------------------------------------------------------------------------------------------------- -// THDoubleVector_fill_VSX: -//-------------------------------------------------------------------------------------------------- -static void THDoubleVector_fill_VSX(double *x, const double c, const ptrdiff_t n) -{ - ptrdiff_t i; - - double val[2] = {c, c}; - vector double fp64vec2 = vec_xl(0, val); - - for (i = 0; i <= n-128; i += 128) - { - vec_xst(fp64vec2, 0, x+(i )); - vec_xst(fp64vec2, 0, x+(i+2 )); - vec_xst(fp64vec2, 0, x+(i+4 )); - vec_xst(fp64vec2, 0, x+(i+6 )); - vec_xst(fp64vec2, 0, x+(i+8 )); - vec_xst(fp64vec2, 0, x+(i+10 )); - vec_xst(fp64vec2, 0, x+(i+12 )); - vec_xst(fp64vec2, 0, x+(i+14 )); - vec_xst(fp64vec2, 0, x+(i+16 )); - vec_xst(fp64vec2, 0, x+(i+18 )); - vec_xst(fp64vec2, 0, x+(i+20 )); - vec_xst(fp64vec2, 0, x+(i+22 )); - vec_xst(fp64vec2, 0, x+(i+24 )); - vec_xst(fp64vec2, 0, x+(i+26 )); - vec_xst(fp64vec2, 0, x+(i+28 )); - vec_xst(fp64vec2, 0, x+(i+30 )); - vec_xst(fp64vec2, 0, x+(i+32 )); - vec_xst(fp64vec2, 0, x+(i+34 )); - vec_xst(fp64vec2, 0, x+(i+36 )); - vec_xst(fp64vec2, 0, x+(i+38 )); - vec_xst(fp64vec2, 0, x+(i+40 )); - vec_xst(fp64vec2, 0, x+(i+42 )); - vec_xst(fp64vec2, 0, x+(i+44 )); - vec_xst(fp64vec2, 0, x+(i+46 )); - vec_xst(fp64vec2, 0, x+(i+48 )); - vec_xst(fp64vec2, 0, x+(i+50 )); - vec_xst(fp64vec2, 0, x+(i+52 )); - vec_xst(fp64vec2, 0, x+(i+54 )); - vec_xst(fp64vec2, 0, x+(i+56 )); - vec_xst(fp64vec2, 0, x+(i+58 )); - vec_xst(fp64vec2, 0, x+(i+60 )); - vec_xst(fp64vec2, 0, x+(i+62 )); - vec_xst(fp64vec2, 0, x+(i+64 )); - vec_xst(fp64vec2, 0, x+(i+66 )); - vec_xst(fp64vec2, 0, x+(i+68 )); - vec_xst(fp64vec2, 0, x+(i+70 )); - vec_xst(fp64vec2, 0, x+(i+72 )); - vec_xst(fp64vec2, 0, x+(i+74 )); - vec_xst(fp64vec2, 0, x+(i+76 )); - vec_xst(fp64vec2, 0, x+(i+78 )); - vec_xst(fp64vec2, 0, x+(i+80 )); - vec_xst(fp64vec2, 0, x+(i+82 )); - vec_xst(fp64vec2, 0, x+(i+84 )); - vec_xst(fp64vec2, 0, x+(i+86 )); - vec_xst(fp64vec2, 0, x+(i+88 )); - vec_xst(fp64vec2, 0, x+(i+90 )); - vec_xst(fp64vec2, 0, x+(i+92 )); - vec_xst(fp64vec2, 0, x+(i+94 )); - vec_xst(fp64vec2, 0, x+(i+96 )); - vec_xst(fp64vec2, 0, x+(i+98 )); - vec_xst(fp64vec2, 0, x+(i+100)); - vec_xst(fp64vec2, 0, x+(i+102)); - vec_xst(fp64vec2, 0, x+(i+104)); - vec_xst(fp64vec2, 0, x+(i+106)); - vec_xst(fp64vec2, 0, x+(i+108)); - vec_xst(fp64vec2, 0, x+(i+110)); - vec_xst(fp64vec2, 0, x+(i+112)); - vec_xst(fp64vec2, 0, x+(i+114)); - vec_xst(fp64vec2, 0, x+(i+116)); - vec_xst(fp64vec2, 0, x+(i+118)); - vec_xst(fp64vec2, 0, x+(i+120)); - vec_xst(fp64vec2, 0, x+(i+122)); - vec_xst(fp64vec2, 0, x+(i+124)); - vec_xst(fp64vec2, 0, x+(i+126)); - } - for (; i <= n-16; i += 16) - { - vec_xst(fp64vec2, 0, x+(i )); - vec_xst(fp64vec2, 0, x+(i+2 )); - vec_xst(fp64vec2, 0, x+(i+4 )); - vec_xst(fp64vec2, 0, x+(i+6 )); - vec_xst(fp64vec2, 0, x+(i+8 )); - vec_xst(fp64vec2, 0, x+(i+10 )); - vec_xst(fp64vec2, 0, x+(i+12 )); - vec_xst(fp64vec2, 0, x+(i+14 )); - } - for (; i <= n-2; i += 2) - vec_xst(fp64vec2, 0, x+(i )); - for (; i < n; i++) - x[i] = c; -} - - -//-------------------------------------------------------------------------------------------------- -// THDoubleVector_cadds_VSX: -//-------------------------------------------------------------------------------------------------- -static void THDoubleVector_cadd_VSX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) -{ - ptrdiff_t i; - - double val[2] = {c, c}; - vector double c_fp64vec2 = vec_xl(0, val); - - vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; - vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; - vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; - vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; - - - for (i = 0; i <= n-24; i += 24) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - y1_fp64vec2 = vec_xl(0, y+(i+2 )); - y2_fp64vec2 = vec_xl(0, y+(i+4 )); - y3_fp64vec2 = vec_xl(0, y+(i+6 )); - y4_fp64vec2 = vec_xl(0, y+(i+8 )); - y5_fp64vec2 = vec_xl(0, y+(i+10)); - y6_fp64vec2 = vec_xl(0, y+(i+12)); - y7_fp64vec2 = vec_xl(0, y+(i+14)); - y8_fp64vec2 = vec_xl(0, y+(i+16)); - y9_fp64vec2 = vec_xl(0, y+(i+18)); - y10_fp64vec2 = vec_xl(0, y+(i+20)); - y11_fp64vec2 = vec_xl(0, y+(i+22)); - - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - x4_fp64vec2 = vec_xl(0, x+(i+8 )); - x5_fp64vec2 = vec_xl(0, x+(i+10)); - x6_fp64vec2 = vec_xl(0, x+(i+12)); - x7_fp64vec2 = vec_xl(0, x+(i+14)); - x8_fp64vec2 = vec_xl(0, x+(i+16)); - x9_fp64vec2 = vec_xl(0, x+(i+18)); - x10_fp64vec2 = vec_xl(0, x+(i+20)); - x11_fp64vec2 = vec_xl(0, x+(i+22)); - - y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2); - y1_fp64vec2 = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2); - y2_fp64vec2 = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2); - y3_fp64vec2 = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2); - y4_fp64vec2 = vec_madd(y4_fp64vec2, c_fp64vec2, x4_fp64vec2); - y5_fp64vec2 = vec_madd(y5_fp64vec2, c_fp64vec2, x5_fp64vec2); - y6_fp64vec2 = vec_madd(y6_fp64vec2, c_fp64vec2, x6_fp64vec2); - y7_fp64vec2 = vec_madd(y7_fp64vec2, c_fp64vec2, x7_fp64vec2); - y8_fp64vec2 = vec_madd(y8_fp64vec2, c_fp64vec2, x8_fp64vec2); - y9_fp64vec2 = vec_madd(y9_fp64vec2, c_fp64vec2, x9_fp64vec2); - y10_fp64vec2 = vec_madd(y10_fp64vec2, c_fp64vec2,x10_fp64vec2); - y11_fp64vec2 = vec_madd(y11_fp64vec2, c_fp64vec2,x11_fp64vec2); - - vec_xst(y0_fp64vec2, 0, z+(i )); - vec_xst(y1_fp64vec2, 0, z+(i+2 )); - vec_xst(y2_fp64vec2, 0, z+(i+4 )); - vec_xst(y3_fp64vec2, 0, z+(i+6 )); - vec_xst(y4_fp64vec2, 0, z+(i+8 )); - vec_xst(y5_fp64vec2, 0, z+(i+10)); - vec_xst(y6_fp64vec2, 0, z+(i+12)); - vec_xst(y7_fp64vec2, 0, z+(i+14)); - vec_xst(y8_fp64vec2, 0, z+(i+16)); - vec_xst(y9_fp64vec2, 0, z+(i+18)); - vec_xst(y10_fp64vec2, 0, z+(i+20)); - vec_xst(y11_fp64vec2, 0, z+(i+22)); - } - for (; i <= n-8; i += 8) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - y1_fp64vec2 = vec_xl(0, y+(i+2 )); - y2_fp64vec2 = vec_xl(0, y+(i+4 )); - y3_fp64vec2 = vec_xl(0, y+(i+6 )); - - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - - y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2); - y1_fp64vec2 = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2); - y2_fp64vec2 = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2); - y3_fp64vec2 = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2); - - vec_xst(y0_fp64vec2, 0, z+(i )); - vec_xst(y1_fp64vec2, 0, z+(i+2 )); - vec_xst(y2_fp64vec2, 0, z+(i+4 )); - vec_xst(y3_fp64vec2, 0, z+(i+6 )); - } - for (; i <= n-2; i += 2) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - x0_fp64vec2 = vec_xl(0, x+(i )); - y0_fp64vec2 = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2); - vec_xst(y0_fp64vec2, 0, z+(i )); - } - for (; i < n; i++) - z[i] = x[i] + c* y[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THDoubleVector_adds_VSX: -//-------------------------------------------------------------------------------------------------- -static void THDoubleVector_adds_VSX(double *y, const double *x, const double c, const ptrdiff_t n) -{ - ptrdiff_t i; - - double val[2] = {c, c}; - vector double c_fp64vec2 = vec_xl(0, val); - - vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; - vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; - vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; - vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; - - - for (i = 0; i <= n-24; i += 24) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - x4_fp64vec2 = vec_xl(0, x+(i+8 )); - x5_fp64vec2 = vec_xl(0, x+(i+10)); - x6_fp64vec2 = vec_xl(0, x+(i+12)); - x7_fp64vec2 = vec_xl(0, x+(i+14)); - x8_fp64vec2 = vec_xl(0, x+(i+16)); - x9_fp64vec2 = vec_xl(0, x+(i+18)); - x10_fp64vec2 = vec_xl(0, x+(i+20)); - x11_fp64vec2 = vec_xl(0, x+(i+22)); - - y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_add(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_add(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_add(x3_fp64vec2, c_fp64vec2); - y4_fp64vec2 = vec_add(x4_fp64vec2, c_fp64vec2); - y5_fp64vec2 = vec_add(x5_fp64vec2, c_fp64vec2); - y6_fp64vec2 = vec_add(x6_fp64vec2, c_fp64vec2); - y7_fp64vec2 = vec_add(x7_fp64vec2, c_fp64vec2); - y8_fp64vec2 = vec_add(x8_fp64vec2, c_fp64vec2); - y9_fp64vec2 = vec_add(x9_fp64vec2, c_fp64vec2); - y10_fp64vec2 = vec_add(x10_fp64vec2, c_fp64vec2); - y11_fp64vec2 = vec_add(x11_fp64vec2, c_fp64vec2); - - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+2 )); - vec_xst(y2_fp64vec2, 0, y+(i+4 )); - vec_xst(y3_fp64vec2, 0, y+(i+6 )); - vec_xst(y4_fp64vec2, 0, y+(i+8 )); - vec_xst(y5_fp64vec2, 0, y+(i+10)); - vec_xst(y6_fp64vec2, 0, y+(i+12)); - vec_xst(y7_fp64vec2, 0, y+(i+14)); - vec_xst(y8_fp64vec2, 0, y+(i+16)); - vec_xst(y9_fp64vec2, 0, y+(i+18)); - vec_xst(y10_fp64vec2, 0, y+(i+20)); - vec_xst(y11_fp64vec2, 0, y+(i+22)); - } - for (; i <= n-8; i += 8) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - - y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_add(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_add(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_add(x3_fp64vec2, c_fp64vec2); - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+2 )); - vec_xst(y2_fp64vec2, 0, y+(i+4 )); - vec_xst(y3_fp64vec2, 0, y+(i+6 )); - } - for (; i <= n-2; i += 2) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - y0_fp64vec2 = vec_add(x0_fp64vec2, c_fp64vec2); - vec_xst(y0_fp64vec2, 0, y+(i )); - } - for (; i < n; i++) - y[i] = x[i] +c; -} - - -//-------------------------------------------------------------------------------------------------- -// THDoubleVector_cmul_VSX: -//-------------------------------------------------------------------------------------------------- -static void THDoubleVector_cmul_VSX(double *z, const double *x, const double *y, const ptrdiff_t n) -{ - ptrdiff_t i; - - vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; - vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; - vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; - vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; - - - for (i = 0; i <= n-24; i += 24) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - y1_fp64vec2 = vec_xl(0, y+(i+2 )); - y2_fp64vec2 = vec_xl(0, y+(i+4 )); - y3_fp64vec2 = vec_xl(0, y+(i+6 )); - y4_fp64vec2 = vec_xl(0, y+(i+8 )); - y5_fp64vec2 = vec_xl(0, y+(i+10)); - y6_fp64vec2 = vec_xl(0, y+(i+12)); - y7_fp64vec2 = vec_xl(0, y+(i+14)); - y8_fp64vec2 = vec_xl(0, y+(i+16)); - y9_fp64vec2 = vec_xl(0, y+(i+18)); - y10_fp64vec2 = vec_xl(0, y+(i+20)); - y11_fp64vec2 = vec_xl(0, y+(i+22)); - - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - x4_fp64vec2 = vec_xl(0, x+(i+8 )); - x5_fp64vec2 = vec_xl(0, x+(i+10)); - x6_fp64vec2 = vec_xl(0, x+(i+12)); - x7_fp64vec2 = vec_xl(0, x+(i+14)); - x8_fp64vec2 = vec_xl(0, x+(i+16)); - x9_fp64vec2 = vec_xl(0, x+(i+18)); - x10_fp64vec2 = vec_xl(0, x+(i+20)); - x11_fp64vec2 = vec_xl(0, x+(i+22)); - - y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2); - y1_fp64vec2 = vec_mul(y1_fp64vec2, x1_fp64vec2); - y2_fp64vec2 = vec_mul(y2_fp64vec2, x2_fp64vec2); - y3_fp64vec2 = vec_mul(y3_fp64vec2, x3_fp64vec2); - y4_fp64vec2 = vec_mul(y4_fp64vec2, x4_fp64vec2); - y5_fp64vec2 = vec_mul(y5_fp64vec2, x5_fp64vec2); - y6_fp64vec2 = vec_mul(y6_fp64vec2, x6_fp64vec2); - y7_fp64vec2 = vec_mul(y7_fp64vec2, x7_fp64vec2); - y8_fp64vec2 = vec_mul(y8_fp64vec2, x8_fp64vec2); - y9_fp64vec2 = vec_mul(y9_fp64vec2, x9_fp64vec2); - y10_fp64vec2 = vec_mul(y10_fp64vec2, x10_fp64vec2); - y11_fp64vec2 = vec_mul(y11_fp64vec2, x11_fp64vec2); - - vec_xst(y0_fp64vec2, 0, z+(i )); - vec_xst(y1_fp64vec2, 0, z+(i+2 )); - vec_xst(y2_fp64vec2, 0, z+(i+4 )); - vec_xst(y3_fp64vec2, 0, z+(i+6 )); - vec_xst(y4_fp64vec2, 0, z+(i+8 )); - vec_xst(y5_fp64vec2, 0, z+(i+10)); - vec_xst(y6_fp64vec2, 0, z+(i+12)); - vec_xst(y7_fp64vec2, 0, z+(i+14)); - vec_xst(y8_fp64vec2, 0, z+(i+16)); - vec_xst(y9_fp64vec2, 0, z+(i+18)); - vec_xst(y10_fp64vec2, 0, z+(i+20)); - vec_xst(y11_fp64vec2, 0, z+(i+22)); - } - for (; i <= n-8; i += 8) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - y1_fp64vec2 = vec_xl(0, y+(i+2 )); - y2_fp64vec2 = vec_xl(0, y+(i+4 )); - y3_fp64vec2 = vec_xl(0, y+(i+6 )); - - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - - y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2); - y1_fp64vec2 = vec_mul(y1_fp64vec2, x1_fp64vec2); - y2_fp64vec2 = vec_mul(y2_fp64vec2, x2_fp64vec2); - y3_fp64vec2 = vec_mul(y3_fp64vec2, x3_fp64vec2); - - vec_xst(y0_fp64vec2, 0, z+(i )); - vec_xst(y1_fp64vec2, 0, z+(i+2 )); - vec_xst(y2_fp64vec2, 0, z+(i+4 )); - vec_xst(y3_fp64vec2, 0, z+(i+6 )); - } - for (; i <= n-2; i += 2) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - x0_fp64vec2 = vec_xl(0, x+(i )); - y0_fp64vec2 = vec_mul(y0_fp64vec2, x0_fp64vec2); - vec_xst(y0_fp64vec2, 0, z+(i )); - } - for (; i < n; i++) - z[i] = x[i] * y[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THDoubleVector_muls_VSX: -//-------------------------------------------------------------------------------------------------- -static void THDoubleVector_muls_VSX(double *y, const double *x, const double c, const ptrdiff_t n) -{ - ptrdiff_t i; - - double val[2] = {c, c}; - vector double c_fp64vec2 = vec_xl(0, val); - - vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; - vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; - vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; - vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; - - - for (i = 0; i <= n-24; i += 24) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - x4_fp64vec2 = vec_xl(0, x+(i+8 )); - x5_fp64vec2 = vec_xl(0, x+(i+10)); - x6_fp64vec2 = vec_xl(0, x+(i+12)); - x7_fp64vec2 = vec_xl(0, x+(i+14)); - x8_fp64vec2 = vec_xl(0, x+(i+16)); - x9_fp64vec2 = vec_xl(0, x+(i+18)); - x10_fp64vec2 = vec_xl(0, x+(i+20)); - x11_fp64vec2 = vec_xl(0, x+(i+22)); - - y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_mul(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_mul(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_mul(x3_fp64vec2, c_fp64vec2); - y4_fp64vec2 = vec_mul(x4_fp64vec2, c_fp64vec2); - y5_fp64vec2 = vec_mul(x5_fp64vec2, c_fp64vec2); - y6_fp64vec2 = vec_mul(x6_fp64vec2, c_fp64vec2); - y7_fp64vec2 = vec_mul(x7_fp64vec2, c_fp64vec2); - y8_fp64vec2 = vec_mul(x8_fp64vec2, c_fp64vec2); - y9_fp64vec2 = vec_mul(x9_fp64vec2, c_fp64vec2); - y10_fp64vec2 = vec_mul(x10_fp64vec2, c_fp64vec2); - y11_fp64vec2 = vec_mul(x11_fp64vec2, c_fp64vec2); - - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+2 )); - vec_xst(y2_fp64vec2, 0, y+(i+4 )); - vec_xst(y3_fp64vec2, 0, y+(i+6 )); - vec_xst(y4_fp64vec2, 0, y+(i+8 )); - vec_xst(y5_fp64vec2, 0, y+(i+10)); - vec_xst(y6_fp64vec2, 0, y+(i+12)); - vec_xst(y7_fp64vec2, 0, y+(i+14)); - vec_xst(y8_fp64vec2, 0, y+(i+16)); - vec_xst(y9_fp64vec2, 0, y+(i+18)); - vec_xst(y10_fp64vec2, 0, y+(i+20)); - vec_xst(y11_fp64vec2, 0, y+(i+22)); - } - for (; i <= n-8; i += 8) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - - y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_mul(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_mul(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_mul(x3_fp64vec2, c_fp64vec2); - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+2 )); - vec_xst(y2_fp64vec2, 0, y+(i+4 )); - vec_xst(y3_fp64vec2, 0, y+(i+6 )); - } - for (; i <= n-2; i += 2) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - y0_fp64vec2 = vec_mul(x0_fp64vec2, c_fp64vec2); - vec_xst(y0_fp64vec2, 0, y+(i )); - } - for (; i < n; i++) - y[i] = c * x[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THDoubleVector_cdiv_VSX: -//-------------------------------------------------------------------------------------------------- -static void THDoubleVector_cdiv_VSX(double *z, const double *x, const double *y, const ptrdiff_t n) -{ - ptrdiff_t i; - - vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; - vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; - vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; - vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; - - - for (i = 0; i <= n-24; i += 24) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - y1_fp64vec2 = vec_xl(0, y+(i+2 )); - y2_fp64vec2 = vec_xl(0, y+(i+4 )); - y3_fp64vec2 = vec_xl(0, y+(i+6 )); - y4_fp64vec2 = vec_xl(0, y+(i+8 )); - y5_fp64vec2 = vec_xl(0, y+(i+10)); - y6_fp64vec2 = vec_xl(0, y+(i+12)); - y7_fp64vec2 = vec_xl(0, y+(i+14)); - y8_fp64vec2 = vec_xl(0, y+(i+16)); - y9_fp64vec2 = vec_xl(0, y+(i+18)); - y10_fp64vec2 = vec_xl(0, y+(i+20)); - y11_fp64vec2 = vec_xl(0, y+(i+22)); - - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - x4_fp64vec2 = vec_xl(0, x+(i+8 )); - x5_fp64vec2 = vec_xl(0, x+(i+10)); - x6_fp64vec2 = vec_xl(0, x+(i+12)); - x7_fp64vec2 = vec_xl(0, x+(i+14)); - x8_fp64vec2 = vec_xl(0, x+(i+16)); - x9_fp64vec2 = vec_xl(0, x+(i+18)); - x10_fp64vec2 = vec_xl(0, x+(i+20)); - x11_fp64vec2 = vec_xl(0, x+(i+22)); - - y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2); - y1_fp64vec2 = vec_div(x1_fp64vec2, y1_fp64vec2); - y2_fp64vec2 = vec_div(x2_fp64vec2, y2_fp64vec2); - y3_fp64vec2 = vec_div(x3_fp64vec2, y3_fp64vec2); - y4_fp64vec2 = vec_div(x4_fp64vec2, y4_fp64vec2); - y5_fp64vec2 = vec_div(x5_fp64vec2, y5_fp64vec2); - y6_fp64vec2 = vec_div(x6_fp64vec2, y6_fp64vec2); - y7_fp64vec2 = vec_div(x7_fp64vec2, y7_fp64vec2); - y8_fp64vec2 = vec_div(x8_fp64vec2, y8_fp64vec2); - y9_fp64vec2 = vec_div(x9_fp64vec2, y9_fp64vec2); - y10_fp64vec2 = vec_div(x10_fp64vec2, y10_fp64vec2); - y11_fp64vec2 = vec_div(x11_fp64vec2, y11_fp64vec2); - - vec_xst(y0_fp64vec2, 0, z+(i )); - vec_xst(y1_fp64vec2, 0, z+(i+2 )); - vec_xst(y2_fp64vec2, 0, z+(i+4 )); - vec_xst(y3_fp64vec2, 0, z+(i+6 )); - vec_xst(y4_fp64vec2, 0, z+(i+8 )); - vec_xst(y5_fp64vec2, 0, z+(i+10)); - vec_xst(y6_fp64vec2, 0, z+(i+12)); - vec_xst(y7_fp64vec2, 0, z+(i+14)); - vec_xst(y8_fp64vec2, 0, z+(i+16)); - vec_xst(y9_fp64vec2, 0, z+(i+18)); - vec_xst(y10_fp64vec2, 0, z+(i+20)); - vec_xst(y11_fp64vec2, 0, z+(i+22)); - } - for (; i <= n-8; i += 8) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - y1_fp64vec2 = vec_xl(0, y+(i+2 )); - y2_fp64vec2 = vec_xl(0, y+(i+4 )); - y3_fp64vec2 = vec_xl(0, y+(i+6 )); - - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - - y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2); - y1_fp64vec2 = vec_div(x1_fp64vec2, y1_fp64vec2); - y2_fp64vec2 = vec_div(x2_fp64vec2, y2_fp64vec2); - y3_fp64vec2 = vec_div(x3_fp64vec2, y3_fp64vec2); - - vec_xst(y0_fp64vec2, 0, z+(i )); - vec_xst(y1_fp64vec2, 0, z+(i+2 )); - vec_xst(y2_fp64vec2, 0, z+(i+4 )); - vec_xst(y3_fp64vec2, 0, z+(i+6 )); - } - for (; i <= n-2; i += 2) - { - y0_fp64vec2 = vec_xl(0, y+(i )); - x0_fp64vec2 = vec_xl(0, x+(i )); - y0_fp64vec2 = vec_div(x0_fp64vec2, y0_fp64vec2); - vec_xst(y0_fp64vec2, 0, z+(i )); - } - for (; i < n; i++) - z[i] = x[i] / y[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THDoubleVector_divs_VSX: -//-------------------------------------------------------------------------------------------------- -static void THDoubleVector_divs_VSX(double *y, const double *x, const double c, const ptrdiff_t n) -{ - ptrdiff_t i; - - double val[2] = {c, c}; - vector double c_fp64vec2 = vec_xl(0, val); - - vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; - vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; - vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; - vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; - - - for (i = 0; i <= n-24; i += 24) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - x4_fp64vec2 = vec_xl(0, x+(i+8 )); - x5_fp64vec2 = vec_xl(0, x+(i+10)); - x6_fp64vec2 = vec_xl(0, x+(i+12)); - x7_fp64vec2 = vec_xl(0, x+(i+14)); - x8_fp64vec2 = vec_xl(0, x+(i+16)); - x9_fp64vec2 = vec_xl(0, x+(i+18)); - x10_fp64vec2 = vec_xl(0, x+(i+20)); - x11_fp64vec2 = vec_xl(0, x+(i+22)); - - y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); - y4_fp64vec2 = vec_div(x4_fp64vec2, c_fp64vec2); - y5_fp64vec2 = vec_div(x5_fp64vec2, c_fp64vec2); - y6_fp64vec2 = vec_div(x6_fp64vec2, c_fp64vec2); - y7_fp64vec2 = vec_div(x7_fp64vec2, c_fp64vec2); - y8_fp64vec2 = vec_div(x8_fp64vec2, c_fp64vec2); - y9_fp64vec2 = vec_div(x9_fp64vec2, c_fp64vec2); - y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2); - y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2); - - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+2 )); - vec_xst(y2_fp64vec2, 0, y+(i+4 )); - vec_xst(y3_fp64vec2, 0, y+(i+6 )); - vec_xst(y4_fp64vec2, 0, y+(i+8 )); - vec_xst(y5_fp64vec2, 0, y+(i+10)); - vec_xst(y6_fp64vec2, 0, y+(i+12)); - vec_xst(y7_fp64vec2, 0, y+(i+14)); - vec_xst(y8_fp64vec2, 0, y+(i+16)); - vec_xst(y9_fp64vec2, 0, y+(i+18)); - vec_xst(y10_fp64vec2, 0, y+(i+20)); - vec_xst(y11_fp64vec2, 0, y+(i+22)); - } - for (; i <= n-8; i += 8) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+2 )); - x2_fp64vec2 = vec_xl(0, x+(i+4 )); - x3_fp64vec2 = vec_xl(0, x+(i+6 )); - - y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+2 )); - vec_xst(y2_fp64vec2, 0, y+(i+4 )); - vec_xst(y3_fp64vec2, 0, y+(i+6 )); - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+2 )); - vec_xst(y2_fp64vec2, 0, y+(i+4 )); - vec_xst(y3_fp64vec2, 0, y+(i+6 )); - } - for (; i <= n-2; i += 2) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); - vec_xst(y0_fp64vec2, 0, y+(i )); - } - for (; i < n; i++) - y[i] = x[i] / c; -} - - -//-------------------------------------------------------------------------------------------------- -// THFloatVector_fill_VSX: -//-------------------------------------------------------------------------------------------------- -static void THFloatVector_fill_VSX(float *x, const float c, const ptrdiff_t n) -{ - ptrdiff_t i; - - float val[4] = {c, c, c, c}; - vector float fp32vec4 = vec_xl(0, val); - - for (i = 0; i <= n-256; i += 256) - { - vec_xst(fp32vec4, 0, x+(i )); - vec_xst(fp32vec4, 0, x+(i+4 )); - vec_xst(fp32vec4, 0, x+(i+8 )); - vec_xst(fp32vec4, 0, x+(i+12 )); - vec_xst(fp32vec4, 0, x+(i+16 )); - vec_xst(fp32vec4, 0, x+(i+20 )); - vec_xst(fp32vec4, 0, x+(i+24 )); - vec_xst(fp32vec4, 0, x+(i+28 )); - vec_xst(fp32vec4, 0, x+(i+32 )); - vec_xst(fp32vec4, 0, x+(i+36 )); - vec_xst(fp32vec4, 0, x+(i+40 )); - vec_xst(fp32vec4, 0, x+(i+44 )); - vec_xst(fp32vec4, 0, x+(i+48 )); - vec_xst(fp32vec4, 0, x+(i+52 )); - vec_xst(fp32vec4, 0, x+(i+56 )); - vec_xst(fp32vec4, 0, x+(i+60 )); - vec_xst(fp32vec4, 0, x+(i+64 )); - vec_xst(fp32vec4, 0, x+(i+68 )); - vec_xst(fp32vec4, 0, x+(i+72 )); - vec_xst(fp32vec4, 0, x+(i+76 )); - vec_xst(fp32vec4, 0, x+(i+80 )); - vec_xst(fp32vec4, 0, x+(i+84 )); - vec_xst(fp32vec4, 0, x+(i+88 )); - vec_xst(fp32vec4, 0, x+(i+92 )); - vec_xst(fp32vec4, 0, x+(i+96 )); - vec_xst(fp32vec4, 0, x+(i+100)); - vec_xst(fp32vec4, 0, x+(i+104)); - vec_xst(fp32vec4, 0, x+(i+108)); - vec_xst(fp32vec4, 0, x+(i+112)); - vec_xst(fp32vec4, 0, x+(i+116)); - vec_xst(fp32vec4, 0, x+(i+120)); - vec_xst(fp32vec4, 0, x+(i+124)); - vec_xst(fp32vec4, 0, x+(i+128)); - vec_xst(fp32vec4, 0, x+(i+132)); - vec_xst(fp32vec4, 0, x+(i+136)); - vec_xst(fp32vec4, 0, x+(i+140)); - vec_xst(fp32vec4, 0, x+(i+144)); - vec_xst(fp32vec4, 0, x+(i+148)); - vec_xst(fp32vec4, 0, x+(i+152)); - vec_xst(fp32vec4, 0, x+(i+156)); - vec_xst(fp32vec4, 0, x+(i+160)); - vec_xst(fp32vec4, 0, x+(i+164)); - vec_xst(fp32vec4, 0, x+(i+168)); - vec_xst(fp32vec4, 0, x+(i+172)); - vec_xst(fp32vec4, 0, x+(i+176)); - vec_xst(fp32vec4, 0, x+(i+180)); - vec_xst(fp32vec4, 0, x+(i+184)); - vec_xst(fp32vec4, 0, x+(i+188)); - vec_xst(fp32vec4, 0, x+(i+192)); - vec_xst(fp32vec4, 0, x+(i+196)); - vec_xst(fp32vec4, 0, x+(i+200)); - vec_xst(fp32vec4, 0, x+(i+204)); - vec_xst(fp32vec4, 0, x+(i+208)); - vec_xst(fp32vec4, 0, x+(i+212)); - vec_xst(fp32vec4, 0, x+(i+216)); - vec_xst(fp32vec4, 0, x+(i+220)); - vec_xst(fp32vec4, 0, x+(i+224)); - vec_xst(fp32vec4, 0, x+(i+228)); - vec_xst(fp32vec4, 0, x+(i+232)); - vec_xst(fp32vec4, 0, x+(i+236)); - vec_xst(fp32vec4, 0, x+(i+240)); - vec_xst(fp32vec4, 0, x+(i+244)); - vec_xst(fp32vec4, 0, x+(i+248)); - vec_xst(fp32vec4, 0, x+(i+252)); - } - for (; i <= n-32; i += 32) - { - vec_xst(fp32vec4, 0, x+(i )); - vec_xst(fp32vec4, 0, x+(i+4 )); - vec_xst(fp32vec4, 0, x+(i+8 )); - vec_xst(fp32vec4, 0, x+(i+12 )); - vec_xst(fp32vec4, 0, x+(i+16 )); - vec_xst(fp32vec4, 0, x+(i+20 )); - vec_xst(fp32vec4, 0, x+(i+24 )); - vec_xst(fp32vec4, 0, x+(i+28 )); - } - for (; i <= n-4; i += 4) - vec_xst(fp32vec4, 0, x+(i )); - for (; i < n; i++) - x[i] = c; -} - - -//-------------------------------------------------------------------------------------------------- -// THFloatVector_cadd_VSX: -//-------------------------------------------------------------------------------------------------- -static void THFloatVector_cadd_VSX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) -{ - ptrdiff_t i; - - float val[4] = {c, c, c, c}; - vector float c_fp32vec4 = vec_xl(0, val); - - vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; - vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; - vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; - vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; - - - for (i = 0; i <= n-48; i += 48) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - y1_fp32vec4 = vec_xl(0, y+(i+4 )); - y2_fp32vec4 = vec_xl(0, y+(i+8 )); - y3_fp32vec4 = vec_xl(0, y+(i+12)); - y4_fp32vec4 = vec_xl(0, y+(i+16 )); - y5_fp32vec4 = vec_xl(0, y+(i+20)); - y6_fp32vec4 = vec_xl(0, y+(i+24)); - y7_fp32vec4 = vec_xl(0, y+(i+28)); - y8_fp32vec4 = vec_xl(0, y+(i+32)); - y9_fp32vec4 = vec_xl(0, y+(i+36)); - y10_fp32vec4 = vec_xl(0, y+(i+40)); - y11_fp32vec4 = vec_xl(0, y+(i+44)); - - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12 )); - x4_fp32vec4 = vec_xl(0, x+(i+16 )); - x5_fp32vec4 = vec_xl(0, x+(i+20)); - x6_fp32vec4 = vec_xl(0, x+(i+24)); - x7_fp32vec4 = vec_xl(0, x+(i+28)); - x8_fp32vec4 = vec_xl(0, x+(i+32)); - x9_fp32vec4 = vec_xl(0, x+(i+36)); - x10_fp32vec4 = vec_xl(0, x+(i+40)); - x11_fp32vec4 = vec_xl(0, x+(i+44)); - - y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4); - y1_fp32vec4 = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4); - y2_fp32vec4 = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4); - y3_fp32vec4 = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4); - y4_fp32vec4 = vec_madd(y4_fp32vec4, c_fp32vec4, x4_fp32vec4); - y5_fp32vec4 = vec_madd(y5_fp32vec4, c_fp32vec4, x5_fp32vec4); - y6_fp32vec4 = vec_madd(y6_fp32vec4, c_fp32vec4, x6_fp32vec4); - y7_fp32vec4 = vec_madd(y7_fp32vec4, c_fp32vec4, x7_fp32vec4); - y8_fp32vec4 = vec_madd(y8_fp32vec4, c_fp32vec4, x8_fp32vec4); - y9_fp32vec4 = vec_madd(y9_fp32vec4, c_fp32vec4, x9_fp32vec4); - y10_fp32vec4 = vec_madd(y10_fp32vec4, c_fp32vec4, x10_fp32vec4); - y11_fp32vec4 = vec_madd(y11_fp32vec4, c_fp32vec4, x11_fp32vec4); - - vec_xst(y0_fp32vec4, 0, z+(i )); - vec_xst(y1_fp32vec4, 0, z+(i+4 )); - vec_xst(y2_fp32vec4, 0, z+(i+8 )); - vec_xst(y3_fp32vec4, 0, z+(i+12 )); - vec_xst(y4_fp32vec4, 0, z+(i+16 )); - vec_xst(y5_fp32vec4, 0, z+(i+20)); - vec_xst(y6_fp32vec4, 0, z+(i+24)); - vec_xst(y7_fp32vec4, 0, z+(i+28)); - vec_xst(y8_fp32vec4, 0, z+(i+32)); - vec_xst(y9_fp32vec4, 0, z+(i+36)); - vec_xst(y10_fp32vec4, 0, z+(i+40)); - vec_xst(y11_fp32vec4, 0, z+(i+44)); - } - for (; i <= n-16; i += 16) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - y1_fp32vec4 = vec_xl(0, y+(i+4 )); - y2_fp32vec4 = vec_xl(0, y+(i+8 )); - y3_fp32vec4 = vec_xl(0, y+(i+12 )); - - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12 )); - - y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4); - y1_fp32vec4 = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4); - y2_fp32vec4 = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4); - y3_fp32vec4 = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4); - - vec_xst(y0_fp32vec4, 0, z+(i )); - vec_xst(y1_fp32vec4, 0, z+(i+4 )); - vec_xst(y2_fp32vec4, 0, z+(i+8 )); - vec_xst(y3_fp32vec4, 0, z+(i+12 )); - } - for (; i <= n-4; i += 4) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - x0_fp32vec4 = vec_xl(0, x+(i )); - y0_fp32vec4 = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4); - vec_xst(y0_fp32vec4, 0, z+(i )); - } - for (; i < n; i++) - z[i] = x[i] + c* y[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THFloatVector_adds_VSX: -//-------------------------------------------------------------------------------------------------- -static void THFloatVector_adds_VSX(float *y, const float *x, const float c, const ptrdiff_t n) -{ - ptrdiff_t i; - float val[4] = {c, c, c, c}; - vector float c_fp32vec4 = vec_xl(0, val); - - vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; - vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; - vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; - vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; - - - for (i = 0; i <= n-48; i += 48) - { - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12)); - x4_fp32vec4 = vec_xl(0, x+(i+16)); - x5_fp32vec4 = vec_xl(0, x+(i+20)); - x6_fp32vec4 = vec_xl(0, x+(i+24)); - x7_fp32vec4 = vec_xl(0, x+(i+28)); - x8_fp32vec4 = vec_xl(0, x+(i+32)); - x9_fp32vec4 = vec_xl(0, x+(i+36)); - x10_fp32vec4 = vec_xl(0, x+(i+40)); - x11_fp32vec4 = vec_xl(0, x+(i+44)); - - y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4); - y1_fp32vec4 = vec_add(x1_fp32vec4, c_fp32vec4); - y2_fp32vec4 = vec_add(x2_fp32vec4, c_fp32vec4); - y3_fp32vec4 = vec_add(x3_fp32vec4, c_fp32vec4); - y4_fp32vec4 = vec_add(x4_fp32vec4, c_fp32vec4); - y5_fp32vec4 = vec_add(x5_fp32vec4, c_fp32vec4); - y6_fp32vec4 = vec_add(x6_fp32vec4, c_fp32vec4); - y7_fp32vec4 = vec_add(x7_fp32vec4, c_fp32vec4); - y8_fp32vec4 = vec_add(x8_fp32vec4, c_fp32vec4); - y9_fp32vec4 = vec_add(x9_fp32vec4, c_fp32vec4); - y10_fp32vec4 = vec_add(x10_fp32vec4, c_fp32vec4); - y11_fp32vec4 = vec_add(x11_fp32vec4, c_fp32vec4); - - vec_xst(y0_fp32vec4, 0, y+(i )); - vec_xst(y1_fp32vec4, 0, y+(i+4 )); - vec_xst(y2_fp32vec4, 0, y+(i+8 )); - vec_xst(y3_fp32vec4, 0, y+(i+12)); - vec_xst(y4_fp32vec4, 0, y+(i+16)); - vec_xst(y5_fp32vec4, 0, y+(i+20)); - vec_xst(y6_fp32vec4, 0, y+(i+24)); - vec_xst(y7_fp32vec4, 0, y+(i+28)); - vec_xst(y8_fp32vec4, 0, y+(i+32)); - vec_xst(y9_fp32vec4, 0, y+(i+36)); - vec_xst(y10_fp32vec4, 0, y+(i+40)); - vec_xst(y11_fp32vec4, 0, y+(i+44)); - } - for (; i <= n-16; i += 16) - { - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12)); - - y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4); - y1_fp32vec4 = vec_add(x1_fp32vec4, c_fp32vec4); - y2_fp32vec4 = vec_add(x2_fp32vec4, c_fp32vec4); - y3_fp32vec4 = vec_add(x3_fp32vec4, c_fp32vec4); - - vec_xst(y0_fp32vec4, 0, y+(i )); - vec_xst(y1_fp32vec4, 0, y+(i+4 )); - vec_xst(y2_fp32vec4, 0, y+(i+8 )); - vec_xst(y3_fp32vec4, 0, y+(i+12)); - } - for (; i <= n-4; i += 4) - { - x0_fp32vec4 = vec_xl(0, x+(i )); - y0_fp32vec4 = vec_add(x0_fp32vec4, c_fp32vec4); - vec_xst(y0_fp32vec4, 0, y+(i )); - } - for (; i < n; i++) - y[i] = c + x[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THFloatVector_cmul_VSX: -//-------------------------------------------------------------------------------------------------- -static void THFloatVector_cmul_VSX(float *z, const float *y, const float *x, const ptrdiff_t n) -{ - ptrdiff_t i; - - vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; - vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; - vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; - vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; - - - for (i = 0; i <= n-48; i += 48) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - y1_fp32vec4 = vec_xl(0, y+(i+4 )); - y2_fp32vec4 = vec_xl(0, y+(i+8 )); - y3_fp32vec4 = vec_xl(0, y+(i+12 )); - y4_fp32vec4 = vec_xl(0, y+(i+16 )); - y5_fp32vec4 = vec_xl(0, y+(i+20)); - y6_fp32vec4 = vec_xl(0, y+(i+24)); - y7_fp32vec4 = vec_xl(0, y+(i+28)); - y8_fp32vec4 = vec_xl(0, y+(i+32)); - y9_fp32vec4 = vec_xl(0, y+(i+36)); - y10_fp32vec4 = vec_xl(0, y+(i+40)); - y11_fp32vec4 = vec_xl(0, y+(i+44)); - - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12 )); - x4_fp32vec4 = vec_xl(0, x+(i+16 )); - x5_fp32vec4 = vec_xl(0, x+(i+20)); - x6_fp32vec4 = vec_xl(0, x+(i+24)); - x7_fp32vec4 = vec_xl(0, x+(i+28)); - x8_fp32vec4 = vec_xl(0, x+(i+32)); - x9_fp32vec4 = vec_xl(0, x+(i+36)); - x10_fp32vec4 = vec_xl(0, x+(i+40)); - x11_fp32vec4 = vec_xl(0, x+(i+44)); - - y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4); - y1_fp32vec4 = vec_mul(y1_fp32vec4, x1_fp32vec4); - y2_fp32vec4 = vec_mul(y2_fp32vec4, x2_fp32vec4); - y3_fp32vec4 = vec_mul(y3_fp32vec4, x3_fp32vec4); - y4_fp32vec4 = vec_mul(y4_fp32vec4, x4_fp32vec4); - y5_fp32vec4 = vec_mul(y5_fp32vec4, x5_fp32vec4); - y6_fp32vec4 = vec_mul(y6_fp32vec4, x6_fp32vec4); - y7_fp32vec4 = vec_mul(y7_fp32vec4, x7_fp32vec4); - y8_fp32vec4 = vec_mul(y8_fp32vec4, x8_fp32vec4); - y9_fp32vec4 = vec_mul(y9_fp32vec4, x9_fp32vec4); - y10_fp32vec4 = vec_mul(y10_fp32vec4, x10_fp32vec4); - y11_fp32vec4 = vec_mul(y11_fp32vec4, x11_fp32vec4); - - vec_xst(y0_fp32vec4, 0, z+(i )); - vec_xst(y1_fp32vec4, 0, z+(i+4 )); - vec_xst(y2_fp32vec4, 0, z+(i+8 )); - vec_xst(y3_fp32vec4, 0, z+(i+12 )); - vec_xst(y4_fp32vec4, 0, z+(i+16 )); - vec_xst(y5_fp32vec4, 0, z+(i+20)); - vec_xst(y6_fp32vec4, 0, z+(i+24)); - vec_xst(y7_fp32vec4, 0, z+(i+28)); - vec_xst(y8_fp32vec4, 0, z+(i+32)); - vec_xst(y9_fp32vec4, 0, z+(i+36)); - vec_xst(y10_fp32vec4, 0, z+(i+40)); - vec_xst(y11_fp32vec4, 0, z+(i+44)); - } - for (; i <= n-16; i += 16) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - y1_fp32vec4 = vec_xl(0, y+(i+4 )); - y2_fp32vec4 = vec_xl(0, y+(i+8 )); - y3_fp32vec4 = vec_xl(0, y+(i+12 )); - - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12 )); - - y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4); - y1_fp32vec4 = vec_mul(y1_fp32vec4, x1_fp32vec4); - y2_fp32vec4 = vec_mul(y2_fp32vec4, x2_fp32vec4); - y3_fp32vec4 = vec_mul(y3_fp32vec4, x3_fp32vec4); - - vec_xst(y0_fp32vec4, 0, z+(i )); - vec_xst(y1_fp32vec4, 0, z+(i+4 )); - vec_xst(y2_fp32vec4, 0, z+(i+8 )); - vec_xst(y3_fp32vec4, 0, z+(i+12 )); - } - for (; i <= n-4; i += 4) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - x0_fp32vec4 = vec_xl(0, x+(i )); - y0_fp32vec4 = vec_mul(y0_fp32vec4, x0_fp32vec4); - vec_xst(y0_fp32vec4, 0, z+(i )); - } - for (; i < n; i++) - z[i] = y[i] * x[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THFloatVector_muls_VSX: -//-------------------------------------------------------------------------------------------------- -static void THFloatVector_muls_VSX(float *y, const float *x, const float c, const ptrdiff_t n) -{ - ptrdiff_t i; - float val[4] = {c, c, c, c}; - vector float c_fp32vec4 = vec_xl(0, val); - - vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; - vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; - vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; - vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; - - - for (i = 0; i <= n-48; i += 48) - { - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12)); - x4_fp32vec4 = vec_xl(0, x+(i+16)); - x5_fp32vec4 = vec_xl(0, x+(i+20)); - x6_fp32vec4 = vec_xl(0, x+(i+24)); - x7_fp32vec4 = vec_xl(0, x+(i+28)); - x8_fp32vec4 = vec_xl(0, x+(i+32)); - x9_fp32vec4 = vec_xl(0, x+(i+36)); - x10_fp32vec4 = vec_xl(0, x+(i+40)); - x11_fp32vec4 = vec_xl(0, x+(i+44)); - - y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4); - y1_fp32vec4 = vec_mul(x1_fp32vec4, c_fp32vec4); - y2_fp32vec4 = vec_mul(x2_fp32vec4, c_fp32vec4); - y3_fp32vec4 = vec_mul(x3_fp32vec4, c_fp32vec4); - y4_fp32vec4 = vec_mul(x4_fp32vec4, c_fp32vec4); - y5_fp32vec4 = vec_mul(x5_fp32vec4, c_fp32vec4); - y6_fp32vec4 = vec_mul(x6_fp32vec4, c_fp32vec4); - y7_fp32vec4 = vec_mul(x7_fp32vec4, c_fp32vec4); - y8_fp32vec4 = vec_mul(x8_fp32vec4, c_fp32vec4); - y9_fp32vec4 = vec_mul(x9_fp32vec4, c_fp32vec4); - y10_fp32vec4 = vec_mul(x10_fp32vec4, c_fp32vec4); - y11_fp32vec4 = vec_mul(x11_fp32vec4, c_fp32vec4); - - vec_xst(y0_fp32vec4, 0, y+(i )); - vec_xst(y1_fp32vec4, 0, y+(i+4 )); - vec_xst(y2_fp32vec4, 0, y+(i+8 )); - vec_xst(y3_fp32vec4, 0, y+(i+12)); - vec_xst(y4_fp32vec4, 0, y+(i+16)); - vec_xst(y5_fp32vec4, 0, y+(i+20)); - vec_xst(y6_fp32vec4, 0, y+(i+24)); - vec_xst(y7_fp32vec4, 0, y+(i+28)); - vec_xst(y8_fp32vec4, 0, y+(i+32)); - vec_xst(y9_fp32vec4, 0, y+(i+36)); - vec_xst(y10_fp32vec4, 0, y+(i+40)); - vec_xst(y11_fp32vec4, 0, y+(i+44)); - } - for (; i <= n-16; i += 16) - { - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12)); - - y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4); - y1_fp32vec4 = vec_mul(x1_fp32vec4, c_fp32vec4); - y2_fp32vec4 = vec_mul(x2_fp32vec4, c_fp32vec4); - y3_fp32vec4 = vec_mul(x3_fp32vec4, c_fp32vec4); - - vec_xst(y0_fp32vec4, 0, y+(i )); - vec_xst(y1_fp32vec4, 0, y+(i+4 )); - vec_xst(y2_fp32vec4, 0, y+(i+8 )); - vec_xst(y3_fp32vec4, 0, y+(i+12)); - } - for (; i <= n-4; i += 4) - { - x0_fp32vec4 = vec_xl(0, x+(i )); - y0_fp32vec4 = vec_mul(x0_fp32vec4, c_fp32vec4); - vec_xst(y0_fp32vec4, 0, y+(i )); - } - for (; i < n; i++) - y[i] = c * x[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THFloatVector_cdiv_VSX: -//-------------------------------------------------------------------------------------------------- -static void THFloatVector_cdiv_VSX(float *z, const float *x, const float *y, const ptrdiff_t n) -{ - ptrdiff_t i; - - vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4; - vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4; - vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4; - vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4; - - - for (i = 0; i <= n-48; i += 48) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - y1_fp32vec4 = vec_xl(0, y+(i+4)); - y2_fp32vec4 = vec_xl(0, y+(i+8)); - y3_fp32vec4 = vec_xl(0, y+(i+12)); - y4_fp32vec4 = vec_xl(0, y+(i+16)); - y5_fp32vec4 = vec_xl(0, y+(i+20)); - y6_fp32vec4 = vec_xl(0, y+(i+24)); - y7_fp32vec4 = vec_xl(0, y+(i+28)); - y8_fp32vec4 = vec_xl(0, y+(i+32)); - y9_fp32vec4 = vec_xl(0, y+(i+36)); - y10_fp32vec4 = vec_xl(0, y+(i+40)); - y11_fp32vec4 = vec_xl(0, y+(i+44)); - - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12 )); - x4_fp32vec4 = vec_xl(0, x+(i+16 )); - x5_fp32vec4 = vec_xl(0, x+(i+20)); - x6_fp32vec4 = vec_xl(0, x+(i+24)); - x7_fp32vec4 = vec_xl(0, x+(i+28)); - x8_fp32vec4 = vec_xl(0, x+(i+32)); - x9_fp32vec4 = vec_xl(0, x+(i+36)); - x10_fp32vec4 = vec_xl(0, x+(i+40)); - x11_fp32vec4 = vec_xl(0, x+(i+44)); - - y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4); - y1_fp32vec4 = vec_div(x1_fp32vec4, y1_fp32vec4); - y2_fp32vec4 = vec_div(x2_fp32vec4, y2_fp32vec4); - y3_fp32vec4 = vec_div(x3_fp32vec4, y3_fp32vec4); - y4_fp32vec4 = vec_div(x4_fp32vec4, y4_fp32vec4); - y5_fp32vec4 = vec_div(x5_fp32vec4, y5_fp32vec4); - y6_fp32vec4 = vec_div(x6_fp32vec4, y6_fp32vec4); - y7_fp32vec4 = vec_div(x7_fp32vec4, y7_fp32vec4); - y8_fp32vec4 = vec_div(x8_fp32vec4, y8_fp32vec4); - y9_fp32vec4 = vec_div(x9_fp32vec4, y9_fp32vec4); - y10_fp32vec4 = vec_div(x10_fp32vec4, y10_fp32vec4); - y11_fp32vec4 = vec_div(x11_fp32vec4, y11_fp32vec4); - - vec_xst(y0_fp32vec4, 0, z+(i )); - vec_xst(y1_fp32vec4, 0, z+(i+4 )); - vec_xst(y2_fp32vec4, 0, z+(i+8 )); - vec_xst(y3_fp32vec4, 0, z+(i+12 )); - vec_xst(y4_fp32vec4, 0, z+(i+16 )); - vec_xst(y5_fp32vec4, 0, z+(i+20)); - vec_xst(y6_fp32vec4, 0, z+(i+24)); - vec_xst(y7_fp32vec4, 0, z+(i+28)); - vec_xst(y8_fp32vec4, 0, z+(i+32)); - vec_xst(y9_fp32vec4, 0, z+(i+36)); - vec_xst(y10_fp32vec4, 0, z+(i+40)); - vec_xst(y11_fp32vec4, 0, z+(i+44)); - } - for (; i <= n-16; i += 16) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - y1_fp32vec4 = vec_xl(0, y+(i+4 )); - y2_fp32vec4 = vec_xl(0, y+(i+8 )); - y3_fp32vec4 = vec_xl(0, y+(i+12 )); - - x0_fp32vec4 = vec_xl(0, x+(i )); - x1_fp32vec4 = vec_xl(0, x+(i+4 )); - x2_fp32vec4 = vec_xl(0, x+(i+8 )); - x3_fp32vec4 = vec_xl(0, x+(i+12 )); - - y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4); - y1_fp32vec4 = vec_div(x1_fp32vec4, y1_fp32vec4); - y2_fp32vec4 = vec_div(x2_fp32vec4, y2_fp32vec4); - y3_fp32vec4 = vec_div(x3_fp32vec4, y3_fp32vec4); - - vec_xst(y0_fp32vec4, 0, z+(i )); - vec_xst(y1_fp32vec4, 0, z+(i+4 )); - vec_xst(y2_fp32vec4, 0, z+(i+8 )); - vec_xst(y3_fp32vec4, 0, z+(i+12 )); - } - for (; i <= n-4; i += 4) - { - y0_fp32vec4 = vec_xl(0, y+(i )); - x0_fp32vec4 = vec_xl(0, x+(i )); - y0_fp32vec4 = vec_div(x0_fp32vec4, y0_fp32vec4); - vec_xst(y0_fp32vec4, 0, z+(i )); - } - for (; i < n; i++) - z[i] = x[i] / y[i]; -} - - -//-------------------------------------------------------------------------------------------------- -// THFloatVector_divs_VSX: -//-------------------------------------------------------------------------------------------------- -static void THFloatVector_divs_VSX(float *y, const float*x, const float c, const ptrdiff_t n) -{ - ptrdiff_t i; - - float val[4] = {c, c, c, c}; - vector float c_fp64vec2 = vec_xl(0, val); - - vector float y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2; - vector float y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2; - vector float x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2; - vector float x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2; - - - for (i = 0; i <= n-48; i += 48) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+4 )); - x2_fp64vec2 = vec_xl(0, x+(i+8 )); - x3_fp64vec2 = vec_xl(0, x+(i+12 )); - x4_fp64vec2 = vec_xl(0, x+(i+16 )); - x5_fp64vec2 = vec_xl(0, x+(i+20)); - x6_fp64vec2 = vec_xl(0, x+(i+24)); - x7_fp64vec2 = vec_xl(0, x+(i+28)); - x8_fp64vec2 = vec_xl(0, x+(i+32)); - x9_fp64vec2 = vec_xl(0, x+(i+36)); - x10_fp64vec2 = vec_xl(0, x+(i+40)); - x11_fp64vec2 = vec_xl(0, x+(i+44)); - - y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); - y4_fp64vec2 = vec_div(x4_fp64vec2, c_fp64vec2); - y5_fp64vec2 = vec_div(x5_fp64vec2, c_fp64vec2); - y6_fp64vec2 = vec_div(x6_fp64vec2, c_fp64vec2); - y7_fp64vec2 = vec_div(x7_fp64vec2, c_fp64vec2); - y8_fp64vec2 = vec_div(x8_fp64vec2, c_fp64vec2); - y9_fp64vec2 = vec_div(x9_fp64vec2, c_fp64vec2); - y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2); - y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2); - - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+4 )); - vec_xst(y2_fp64vec2, 0, y+(i+8 )); - vec_xst(y3_fp64vec2, 0, y+(i+12 )); - vec_xst(y4_fp64vec2, 0, y+(i+16 )); - vec_xst(y5_fp64vec2, 0, y+(i+20)); - vec_xst(y6_fp64vec2, 0, y+(i+24)); - vec_xst(y7_fp64vec2, 0, y+(i+28)); - vec_xst(y8_fp64vec2, 0, y+(i+32)); - vec_xst(y9_fp64vec2, 0, y+(i+36)); - vec_xst(y10_fp64vec2, 0, y+(i+40)); - vec_xst(y11_fp64vec2, 0, y+(i+44)); - } - for (; i <= n-16; i += 16) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - x1_fp64vec2 = vec_xl(0, x+(i+4 )); - x2_fp64vec2 = vec_xl(0, x+(i+8 )); - x3_fp64vec2 = vec_xl(0, x+(i+12 )); - - y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); - y1_fp64vec2 = vec_div(x1_fp64vec2, c_fp64vec2); - y2_fp64vec2 = vec_div(x2_fp64vec2, c_fp64vec2); - y3_fp64vec2 = vec_div(x3_fp64vec2, c_fp64vec2); - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+4 )); - vec_xst(y2_fp64vec2, 0, y+(i+8 )); - vec_xst(y3_fp64vec2, 0, y+(i+12 )); - - vec_xst(y0_fp64vec2, 0, y+(i )); - vec_xst(y1_fp64vec2, 0, y+(i+4 )); - vec_xst(y2_fp64vec2, 0, y+(i+8 )); - vec_xst(y3_fp64vec2, 0, y+(i+16 )); - } - for (; i <= n-4; i += 4) - { - x0_fp64vec2 = vec_xl(0, x+(i )); - y0_fp64vec2 = vec_div(x0_fp64vec2, c_fp64vec2); - vec_xst(y0_fp64vec2, 0, y+(i )); - } - for (; i < n; i++) - y[i] = x[i] / c; -} - - -//------------------------------------------------ -// -// Testing for correctness and performance -// -// If you want to run these tests, compile this -// file with -DRUN_VSX_TESTS on a Power machine, -// and then run the executable that is generated. -// -//------------------------------------------------ -// -// Example passing run (from a Power8 machine): -// -// $ gcc VSX.c -O2 -D RUN_VSX_TESTS -o vsxtest -// $ ./vsxtest -// -// TODO -// -// -// Finished runnning all tests. All tests PASSED. -// -//------------------------------------------------ -#ifdef RUN_VSX_TESTS - -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include <assert.h> -#include <math.h> - -#define VSX_PERF_NUM_TEST_ELEMENTS 100000000 -#define VSX_FUNC_NUM_TEST_ELEMENTS 2507 - - -//-------------------------------------------------------------------------------------------------- -// Standard implementations: -//-------------------------------------------------------------------------------------------------- -static void standardDouble_fill(double *x, const double c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - x[i] = c; -} - -static void standardFloat_fill(float *x, const float c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - x[i] = c; -} - -static void standardDouble_cadd(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - z[i] = x[i] + c * y[i]; -} - -static void standardFloat_cadd(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - z[i] = x[i] + c * y[i]; -} - -static void standardDouble_adds(double *y, const double *x, const double c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - y[i] = c + x[i]; -} - -static void standardFloat_adds(float *y, const float *x, const float c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - y[i] = c + x[i]; -} - -static void standardDouble_cmul(double *z, const double *x, const double *y, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - z[i] = x[i] * y[i]; -} - -static void standardFloat_cmul(float *z, const float *x, const float *y, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - z[i] = x[i] * y[i]; -} - -static void standardDouble_muls(double *y, const double *x, const double c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - y[i] = c * x[i]; -} - -static void standardFloat_muls(float *y, const float *x, const float c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - y[i] = c * x[i]; -} - -static void standardDouble_cdiv(double *z, const double *x, const double *y, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - z[i] = x[i] / y[i]; -} - -static void standardFloat_cdiv(float *z, const float *x, const float *y, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - z[i] = x[i] / y[i]; -} - -static void standardDouble_divs(double *y, const double *x, const double c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - y[i] = x[i] / c; -} - -static void standardFloat_divs(float *y, const float *x, const float c, const ptrdiff_t n) -{ - for (ptrdiff_t i = 0; i < n; i++) - y[i] = x[i] / c; -} - -double randDouble() -{ - return (double)(rand()%100)/(double)(rand()%100) * (rand()%2 ? -1.0 : 1.0); -} - -int near(double a, double b) -{ - int aClass = fpclassify(a); - int bClass = fpclassify(b); - - if(aClass != bClass) // i.e. is it NAN, infinite, or finite...? - return 0; - - if(aClass == FP_INFINITE) // if it is infinite, the sign must be the same, i.e. positive infinity is not near negative infinity - return (signbit(a) == signbit(b)); - else if(aClass == FP_NORMAL) // if it is a normal number then check the magnitude of the difference between the numbers - return fabs(a - b) < 0.001; - else // if both number are of the same class as each other and are of any other class (i.e. such as NAN), then they are near to each other. - return 1; -} - - -//-------------------------------------------------------------------------------------------------- -// Standard tests: -//-------------------------------------------------------------------------------------------------- -void test_THDoubleVector_fill_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - double *x_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *x_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - - double yVal0 = 17.2; - double yVal1 = 8.2; - double yVal2 = 5.1; - double yVal3 = -0.9; - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardDouble_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); - standardDouble_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardDouble_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardDouble_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardDouble_fill() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); - THDoubleVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); - THDoubleVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); - THDoubleVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THDoubleVector_fill_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - yVal0 += 1.0; - yVal1 += 1.0; - yVal2 += 1.0; - yVal3 -= 1.0; - - standardDouble_fill( x_standard, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); - THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - assert(x_optimized[i] == yVal0); - - standardDouble_fill( x_standard+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THDoubleVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardDouble_fill( x_standard+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THDoubleVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardDouble_fill( x_standard+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THDoubleVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardDouble_fill( x_standard+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THDoubleVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardDouble_fill( x_standard+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THDoubleVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - assert(x_optimized[i] == x_standard[i]); - printf("All assertions PASSED for THDoubleVector_fill_VSX() test.\n\n"); - - - free(x_standard); - free(x_optimized); -} - - -void test_THFloatVector_fill_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - float *x_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *x_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - - float yVal0 = 17.2; - float yVal1 = 8.2; - float yVal2 = 5.1; - float yVal3 = -0.9; - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardFloat_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); - standardFloat_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardFloat_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardFloat_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardFloat_fill() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THFloatVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS ); - THFloatVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1); - THFloatVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2); - THFloatVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THFloatVector_fill_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - yVal0 += 1.0; - yVal1 += 1.0; - yVal2 += 1.0; - yVal3 -= 1.0; - - standardFloat_fill( x_standard, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); - THFloatVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - assert(x_optimized[i] == yVal0); - - standardFloat_fill( x_standard+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THFloatVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardFloat_fill( x_standard+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THFloatVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardFloat_fill( x_standard+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THFloatVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardFloat_fill( x_standard+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THFloatVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardFloat_fill( x_standard+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THFloatVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - assert(x_optimized[i] == x_standard[i]); - printf("All assertions PASSED for THFloatVector_fill_VSX() test.\n\n"); - - - free(x_standard); - free(x_optimized); -} - - -void test_THDoubleVector_cadd_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double c = randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = randDouble(); - y[i] = randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardDouble_cadd() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THDoubleVector_cadd_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardDouble_cadd( z_standard+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THDoubleVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardDouble_cadd( z_standard+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THDoubleVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardDouble_cadd( z_standard+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THDoubleVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardDouble_cadd( z_standard+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THDoubleVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardDouble_cadd( z_standard+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THDoubleVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(z_optimized[i], z_standard[i])) - printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); - assert(near(z_optimized[i], z_standard[i])); - } - printf("All assertions PASSED for THDoubleVector_cadd_VSX() test.\n\n"); - - - free(z_standard); - free(z_optimized); - free(x); -} - -void test_THFloatVector_cadd_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float c = (float)randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = (float)randDouble(); - y[i] = (float)randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardFloat_cadd() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THFloatVector_cadd_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardFloat_cadd( z_standard+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THFloatVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardFloat_cadd( z_standard+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THFloatVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardFloat_cadd( z_standard+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THFloatVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardFloat_cadd( z_standard+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THFloatVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardFloat_cadd( z_standard+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THFloatVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(z_optimized[i], z_standard[i])) - printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); - assert(near(z_optimized[i], z_standard[i])); - } - printf("All assertions PASSED for THFloatVector_cadd_VSX() test.\n\n"); - - - free(z_standard); - free(z_optimized); - free(x); -} - -void test_THDoubleVector_adds_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double c = randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - x[i] = randDouble(); - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardDouble_adds() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THDoubleVector_adds_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardDouble_adds( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THDoubleVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardDouble_adds( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THDoubleVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardDouble_adds( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THDoubleVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardDouble_adds( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THDoubleVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardDouble_adds( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THDoubleVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(y_optimized[i], y_standard[i])) - printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); - assert(near(y_optimized[i], y_standard[i])); - } - printf("All assertions PASSED for THDoubleVector_adds_VSX() test.\n\n"); - - - free(y_standard); - free(y_optimized); - free(x); -} - - -void test_THFloatVector_adds_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float c = (float)randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - x[i] = (float)randDouble(); - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardFloat_adds() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THFloatVector_adds_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardFloat_adds( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THFloatVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardFloat_adds( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THFloatVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardFloat_adds( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THFloatVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardFloat_adds( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THFloatVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardFloat_adds( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THFloatVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(y_optimized[i], y_standard[i])) - printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); - assert(near(y_optimized[i], y_standard[i])); - } - printf("All assertions PASSED for THFloatVector_adds_VSX() test.\n\n"); - - - free(y_standard); - free(y_optimized); - free(x); -} - - -void test_THDoubleVector_cmul_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = randDouble(); - y[i] = randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardDouble_cmul() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THDoubleVector_cmul_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardDouble_cmul( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THDoubleVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardDouble_cmul( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THDoubleVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardDouble_cmul( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THDoubleVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardDouble_cmul( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THDoubleVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardDouble_cmul( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THDoubleVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(z_optimized[i], z_standard[i])) - printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); - assert(near(z_optimized[i], z_standard[i])); - } - printf("All assertions PASSED for THDoubleVector_cmul_VSX() test.\n\n"); - - - free(z_standard); - free(z_optimized); - free(x); -} - -void test_THFloatVector_cmul_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = (float)randDouble(); - y[i] = (float)randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardFloat_cmul() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THFloatVector_cmul_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardFloat_cmul( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THFloatVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardFloat_cmul( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THFloatVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardFloat_cmul( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THFloatVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardFloat_cmul( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THFloatVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardFloat_cmul( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THFloatVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(z_optimized[i], z_standard[i])) - printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); - assert(near(z_optimized[i], z_standard[i])); - } - printf("All assertions PASSED for THFloatVector_cmul_VSX() test.\n\n"); - - - free(z_standard); - free(z_optimized); - free(x); -} - -void test_THDoubleVector_muls_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double c = randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardDouble_muls() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THDoubleVector_muls_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardDouble_muls( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THDoubleVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardDouble_muls( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THDoubleVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardDouble_muls( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THDoubleVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardDouble_muls( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THDoubleVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardDouble_muls( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THDoubleVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(y_optimized[i], y_standard[i])) - printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); - assert(near(y_optimized[i], y_standard[i])); - } - printf("All assertions PASSED for THDoubleVector_muls_VSX() test.\n\n"); - - - free(y_standard); - free(y_optimized); - free(x); -} - -void test_THFloatVector_muls_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float c = (float)randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = (float)randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardFloat_muls() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THFloatVector_muls_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardFloat_muls( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THFloatVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardFloat_muls( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THFloatVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardFloat_muls( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THFloatVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardFloat_muls( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THFloatVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardFloat_muls( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THFloatVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(y_optimized[i], y_standard[i])) - printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); - assert(near(y_optimized[i], y_standard[i])); - } - printf("All assertions PASSED for THFloatVector_muls_VSX() test.\n\n"); - - - free(y_standard); - free(y_optimized); - free(x); -} - - - -void test_THDoubleVector_cdiv_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - double *z_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *y = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = randDouble(); - y[i] = randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardDouble_cdiv() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THDoubleVector_cdiv_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardDouble_cdiv( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THDoubleVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardDouble_cdiv( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THDoubleVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardDouble_cdiv( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THDoubleVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardDouble_cdiv( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THDoubleVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardDouble_cdiv( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THDoubleVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(z_optimized[i], z_standard[i])) - printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); - assert(near(z_optimized[i], z_standard[i])); - } - printf("All assertions PASSED for THDoubleVector_cdiv_VSX() test.\n\n"); - - - free(z_standard); - free(z_optimized); - free(x); -} - -void test_THFloatVector_cdiv_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - float *z_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *y = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = (float)randDouble(); - y[i] = (float)randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardFloat_cdiv() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS ); - THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1); - THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2); - THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THFloatVector_cdiv_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardFloat_cdiv( z_standard+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THFloatVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardFloat_cdiv( z_standard+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THFloatVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardFloat_cdiv( z_standard+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THFloatVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardFloat_cdiv( z_standard+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THFloatVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardFloat_cdiv( z_standard+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THFloatVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(z_optimized[i], z_standard[i])) - printf("%d %f %f\n", i, z_optimized[i], z_standard[i]); - assert(near(z_optimized[i], z_standard[i])); - } - printf("All assertions PASSED for THFloatVector_cdiv_VSX() test.\n\n"); - - - free(z_standard); - free(z_optimized); - free(x); -} - -void test_THDoubleVector_divs_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - double *y_standard = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double *x = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double)); - double c = randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardDouble_divs() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THDoubleVector_divs_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardDouble_divs( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THDoubleVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardDouble_divs( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THDoubleVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardDouble_divs( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THDoubleVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardDouble_divs( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THDoubleVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardDouble_divs( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THDoubleVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(y_optimized[i], y_standard[i])) - printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); - assert(near(y_optimized[i], y_standard[i])); - } - printf("All assertions PASSED for THDoubleVector_divs_VSX() test.\n\n"); - - - free(y_standard); - free(y_optimized); - free(x); -} - -void test_THFloatVector_divs_VSX() -{ - clock_t start, end; - double elapsedSeconds_optimized, elapsedSeconds_standard; - - float *y_standard = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float *x = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float)); - float c = (float)randDouble(); - - // Initialize randomly - for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++) - { - x[i] = (float)randDouble(); - } - - - //------------------------------------------------- - // Performance Test - //------------------------------------------------- - start = clock(); - standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC; - printf("standardFloat_divs() test took %.5lf seconds\n", elapsedSeconds_standard); - - start = clock(); - THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS ); - THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1); - THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2); - THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3); - end = clock(); - - elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC; - printf("THFloatVector_divs_VSX() test took %.5lf seconds\n", elapsedSeconds_optimized); - - - //------------------------------------------------- - // Correctness Test - //------------------------------------------------- - standardFloat_divs( y_standard+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - THFloatVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2); - standardFloat_divs( y_standard+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - THFloatVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4); - standardFloat_divs( y_standard+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - THFloatVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6); - standardFloat_divs( y_standard+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - THFloatVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029); - int r = rand() % 258; - standardFloat_divs( y_standard+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - THFloatVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100)); - - for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++) - { - if(!near(y_optimized[i], y_standard[i])) - printf("%d %f %f\n", i, y_optimized[i], y_standard[i]); - assert(near(y_optimized[i], y_standard[i])); - } - printf("All assertions PASSED for THFloatVector_divs_VSX() test.\n\n"); - - - free(y_standard); - free(y_optimized); - free(x); -} - - -//-------------------------------------------------------------------------------------------------- -// Run tests: -//-------------------------------------------------------------------------------------------------- -int main() -{ - printf("\n"); - - - // First test utility functions - - assert(!near(0.1, -0.1)); - assert(!near(0.1f, -0.1f)); - assert(!near(9, 10)); - assert(near(0.1, 0.1000001)); - assert(near(0.1f, 0.1000001f)); - assert(near(100.764, 100.764)); - assert(!near(NAN, 0.0)); - assert(!near(-9.5, NAN)); - assert(!near(NAN, 100)); - assert(!near(-0.0, NAN)); - assert(near(NAN, NAN)); - assert(near(INFINITY, INFINITY)); - assert(near(-INFINITY, -INFINITY)); - assert(!near(INFINITY, NAN)); - assert(!near(0, INFINITY)); - assert(!near(-999.4324, INFINITY)); - assert(!near(INFINITY, 982374.1)); - assert(!near(-INFINITY, INFINITY)); - - - - // Then test each vectorized function - - test_THDoubleVector_fill_VSX(); - test_THFloatVector_fill_VSX(); - - test_THDoubleVector_cadd_VSX(); - test_THFloatVector_cadd_VSX(); - - test_THDoubleVector_adds_VSX(); - test_THFloatVector_adds_VSX(); - - test_THDoubleVector_cmul_VSX(); - test_THFloatVector_cmul_VSX(); - - test_THDoubleVector_muls_VSX(); - test_THFloatVector_muls_VSX(); - - test_THDoubleVector_cdiv_VSX(); - test_THFloatVector_cdiv_VSX(); - - test_THDoubleVector_divs_VSX(); - test_THFloatVector_divs_VSX(); - - - - printf("Finished runnning all tests. All tests PASSED.\n"); - return 0; -} - - -#endif // defined RUN_VSX_TESTS - -#endif // defined __PPC64__ - diff --git a/contrib/lua-torch/torch7/lib/luaT/CMakeLists.txt b/contrib/lua-torch/torch7/lib/luaT/CMakeLists.txt deleted file mode 100644 index 518c407f2..000000000 --- a/contrib/lua-torch/torch7/lib/luaT/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -# avoid some cmake warnings - -INCLUDE_DIRECTORIES(${LUA_INCDIR}) -IF(LUALIB) - LINK_DIRECTORIES(${LUA_LIBDIR}) # note: must be done before defining target -ENDIF() - -ADD_LIBRARY(luaT STATIC luaT.h luaT.c) - -IF(LUALIB) - TARGET_LINK_LIBRARIES(luaT ${LUALIB}) # must be done after ;) -ENDIF() diff --git a/contrib/lua-torch/torch7/lib/luaT/README.md b/contrib/lua-torch/torch7/lib/luaT/README.md deleted file mode 100644 index 235b8edc0..000000000 --- a/contrib/lua-torch/torch7/lib/luaT/README.md +++ /dev/null @@ -1,266 +0,0 @@ -<a name="luat.dok"></a> -# Lua Torch C API # - -luaT provides an API to interface Lua and C in Torch packages. It defines a -concept of _classes_ to Lua for Torch, and provides a mechanism to easily -handle these Lua classes from C. - -It additionally provides few functions that `luaL` should have defined, and -defines several functions similar to `luaL` ones for better type error printing when using -`luaT` classes. - -<a name="luat.memory.dok"></a> -## Memory functions ## - -Classical memory allocation functions which generate a Lua error in case of -problem. - -<a name="luaT_alloc"></a> -### void* luaT_alloc(lua_State *L, long size) ### - -Allocates `size` bytes, and return a pointer on the allocated -memory. A Lua error will be generated if running out of memory. - -<a name="luaT_realloc"></a> -### void* luaT_realloc(lua_State *L, void *ptr, long size) ### - -Realloc `ptr` to `size` bytes. `ptr` must have been previously -allocated with [luaT_alloc](#luaT_alloc) or -[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc` -functions. A Lua error will be generated if running out of memory. - -<a name="luaT_free"></a> -### void luaT_free(lua_State *L, void *ptr) ### - -Free memory allocated at address `ptr`. The memory must have been -previously allocated with [luaT_alloc](#luaT_alloc) or -[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc` -functions. - -<a name="luat.classcreate"></a> -## Class creation and basic handling ## - -A `luaT` class is basically either a Lua _table_ or _userdata_ with -an appropriate _metatable_. This appropriate metatable is created with -[luaT_newmetatable](#luaT_newmetatable). Contrary to luaL userdata -functions, luaT mechanism handles inheritance. If the class inherit from -another class, then the metatable will itself have a metatable -corresponding to the _parent metatable_: the metatables are cascaded -according to the class inheritance. Multiple inheritance is not supported. - -<a name="luat.operatoroverloading"></a> -### Operator overloading ### - -The metatable of a `luaT` object contains `Lua` operators like -`__index`, `__newindex`, `__tostring`, `__add` -(etc...). These operators will respectively look for `__index__`, -`__newindex__`, `__tostring__`, `__add__` (etc...) in the -metatable. If found, the corresponding function or value will be returned, -else a Lua error will be raised. - -If one wants to provide `__index__` or `__newindex__` in the -metaclass, these operators must follow a particular scheme: - - * `__index__` must either return a value _and_ `true` or return `false` only. In the first case, it means `__index__` was able to handle the given argument (for e.g., the type was correct). The second case means it was not able to do anything, so `__index` in the root metatable can then try to see if the metaclass contains the required value. - - * `__newindex__` must either return `true` or `false`. As for `__index__`, `true` means it could handle the argument and `false` not. If not, the root metatable `__newindex` will then raise an error if the object was a userdata, or apply a rawset if the object was a Lua table. - -Other metaclass operators like `__tostring__`, `__add__`, etc... do not have any particular constraint. - -<a name="luat_newlocalmetatable"></a> -### const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx) ### - -This function creates a new metatable, which is the Lua way to define a new -object class. As for `luaL_newmetatable`, the metatable is registered in -the Lua registry table, with the key `tname`. In addition, `tname` is -also registered in the Lua registry, with the metatable as key (the -typename of a given object can be thus easily retrieved). - -The class name `tname` must be of the form `modulename.classname`. If not -NULL, `parenttname` must be a valid typename corresponding to the parent -class of the new class. - -If `constructor` is not NULL, a function `new` will be added to the -metatable, pointing to this given function. - -A "constructor table" will be created by `luaT_newlocalmetatable`: it will -contain all the class methods, and be callable, calling the `constructor`, if -a `constructor` has been passed. The constructor table is either stored into -`modulename.classname` (that is in the global namespace) if `moduleidx <= -0` or in the table at index `moduleidx` in the stack (if `moduleidx > 0`). - -If not NULL, `destructor` will be called when garbage collecting the object. - -If not NULL, `factory` must be a Lua C function creating an empty object -instance of the class. This functions are used in Torch for serialization. - -Note that classes can be partly defined in C and partly defined in Lua: -once the metatable is created in C, it can be filled up with additional -methods in Lua. - -The return value is the value returned by [luaT_typenameid](#luat_typenameid). - -<a name="luat_newmetatable"></a> -### const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) ### - -Same as [luaT_newlocalmetatable](#luat_newmetatable), but where the -constructor table is assigned in the global namespace (`moduleidx = 0`). - -<a name="luat_pushmetatable"></a> -### int luaT_pushmetatable(lua_State *L, const name *tname) ### - -Push the metatable with type name `tname` on the stack, if `tname` is a -valid Torch class name (previously registered with luaT_newmetatable). - -On success, returns 1. If `tname` is invalid, nothing is pushed and it -returns 0. - -<a name="luat_typenameid"></a> -### const char* luaT_typenameid(lua_State *L, const char *tname) ### - -If `tname` is a valid Torch class name, then returns a unique string (the -contents will be the same as `tname`) pointing to the string registered -in the Lua registry. This string is thus valid as long as Lua is -running. The returned string shall not be freed. - -If `tname` is an invalid class name, returns NULL. - -<a name="luat_typename"></a> -### const char* luaT_typename(lua_State *L, int ud) ### - -Returns the typename of the object at index `ud` on the stack. If it is -not a valid Torch object, returns NULL. - -<a name="luat_pushudata"></a> -### void luaT_pushudata(lua_State *L, void *udata, const char *tname) ### - -Given a C structure `udata`, push a userdata object on the stack with -metatable corresponding to `tname`. Obviously, `tname` must be a valid -Torch name registered with [luaT_newmetatable](#luat_newmetatable). - -<a name="luat_toudata"></a> -### void *luaT_toudata(lua_State *L, int ud, const char *tname) ### - -Returns a pointer to the original C structure previously pushed on the -stack with [luaT_pushudata](#luat_pushudata), if the object at index -`ud` is a valid Torch class name. Returns NULL otherwise. - -<a name="luat_isudata"></a> -### int luaT_isudata(lua_State *L, int ud, const char *tname) ### - -Returns 1 if the object at index `ud` on the stack is a valid Torch class name `tname`. -Returns 0 otherwise. - -<a name="luat_getfield"></a> -### Checking fields of a table ### - -This functions check that the table at the given index `ud` on the Lua -stack has a field named `field`, and that it is of the specified type. -These function raises a Lua error on failure. - -<a name="luat_getfieldcheckudata"></a> -## void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) ## - -Checks that the field named `field` of the table at index `ud` is a -Torch class name `tname`. Returns the pointer of the C structure -previously pushed on the stack with [luaT_pushudata](#luat_pushudata) on -success. The function raises a Lua error on failure. - -<a name="luat_getfieldchecklightudata"></a> -## void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) ## - -Checks that the field named `field` of the table at index `ud` is a -lightuserdata. Returns the lightuserdata pointer on success. The function -raises a Lua error on failure. - -<a name="luat_getfieldcheckint"></a> -## int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) ## - -Checks that the field named `field` of the table at index `ud` is an -int. Returns the int value pointer on success. The function raises a Lua -error on failure. - -<a name="luat_getfieldcheckstring"></a> -## const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) ## - -Checks that the field named `field` of the table at index `ud` is a -string. Returns a pointer to the string on success. The function raises a -Lua error on failure. - -<a name="luat_getfieldcheckboolean"></a> -## int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) ## - -Checks that the field named `field` of the table at index `ud` is a -boolean. On success, returns 1 if the boolean is `true`, 0 if it is -`false`. The function raises a Lua error on failure. - -<a name="luat_getfieldchecktable"></a> -## void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) ## - -Checks that the field named `field` of the table at index `ud` is a -table. On success, push the table on the stack. The function raises a Lua -error on failure. - -<a name="luat_typerror"></a> -### int luaT_typerror(lua_State *L, int ud, const char *tname) ### - -Raises a `luaL_argerror` (and returns its value), claiming that the -object at index `ud` on the stack is not of type `tname`. Note that -this function does not check the type, it only raises an error. - -<a name="luat_checkboolean"></a> -### int luaT_checkboolean(lua_State *L, int ud) ### - -Checks that the value at index `ud` is a boolean. On success, returns 1 -if the boolean is `true`, 0 if it is `false`. The function raises a Lua -error on failure. - -<a name="luat_optboolean"></a> -### int luaT_optboolean(lua_State *L, int ud, int def) ### - -Checks that the value at index `ud` is a boolean. On success, returns 1 -if the boolean is `true`, 0 if it is `false`. If there is no value at -index `ud`, returns `def`. In any other cases, raises an error. - -<a name="luat_registeratname"></a> -### void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) ### - -This function assume a table is on the stack. It creates a table field -`name` in the table (if this field does not exist yet), and fill up -`methods` in this table field. - -<a name="luat_classrootname"></a> -### const char *luaT_classrootname(const char *tname) ### - -Assuming `tname` is of the form `A.b.c`, returns 'c'. The returned value -shall not be freed. It is a pointer inside `tname` string. - -<a name="luat_classmodulename"></a> -### int luaT_classmodulename(const char *tname, char *parent_name) ### -Alias to `luaT_fullparentname ` for ensuring backwards compatibility; -use of `luaT_fullparentname` is preferred. - -<a name="luat_fullparentname"></a> -### int luaT_fullparentname(const char *tname, char *parent_name) ### - -Returns a 0-1 valued integer indicating whether `tname` has a parent module. -Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `A.b`. - -<a name="luat_classmodulename"></a> -### int luaT_outerparentname(const char *tname, char *parent_name) ### - -Returns a 0-1 valued integer indicating whether `tname` has a parent module. -Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `A`. - -<a name="luat_classmodulename"></a> -### int luaT_innerparentname(const char *tname, char *parent_name) ### - -Returns a 0-1 valued integer indicating whether `tname` has a parent module. -Assuming `tname` is of the form `A.b.c`, sets `parent_name` to `b`. - -<a name="luat_stackdump"></a> -### void luaT_stackdump(lua_State *L) ### - -This function print outs the state of the Lua stack. It is useful for debug -purposes. - diff --git a/contrib/lua-torch/torch7/lib/luaT/luaT.c b/contrib/lua-torch/torch7/lib/luaT/luaT.c deleted file mode 100644 index d87f5d54c..000000000 --- a/contrib/lua-torch/torch7/lib/luaT/luaT.c +++ /dev/null @@ -1,1373 +0,0 @@ -#include <stdlib.h> -#include <string.h> -#include <stdint.h> - -#include "luaT.h" - -void* luaT_alloc(lua_State *L, ptrdiff_t size) -{ - void *ptr; - - if(size == 0) - return NULL; - - if(size < 0) - luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?"); - - ptr = malloc(size); - if(!ptr) - luaL_error(L, "$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824); - - return ptr; -} - -void* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size) -{ - if(!ptr) - return(luaT_alloc(L, size)); - - if(size == 0) - { - luaT_free(L, ptr); - return NULL; - } - - if(size < 0) - luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?"); - - ptr = realloc(ptr, size); - if(!ptr) - luaL_error(L, "$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); - return ptr; -} - -void luaT_free(lua_State *L, void *ptr) -{ - free(ptr); -} - -void luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup) -{ -#if LUA_VERSION_NUM == 501 - luaL_checkstack(L, nup+1, "too many upvalues"); - for (; l->name != NULL; l++) { /* fill the table with given functions */ - int i; - lua_pushstring(L, l->name); - for (i = 0; i < nup; i++) /* copy upvalues to the top */ - lua_pushvalue(L, -(nup+1)); - lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ - lua_settable(L, -(nup + 3)); - } - lua_pop(L, nup); /* remove upvalues */ -#else - luaL_setfuncs(L, l, nup); -#endif -} - -void luaT_stackdump(lua_State *L) -{ - int i; - const char *tname = NULL; - int top = lua_gettop(L); - for(i = 1; i <= top; i++) - { - int t = lua_type(L, i); - printf("%3d. ", i); - switch(t) - { - case LUA_TSTRING: - printf("'%s'", lua_tostring(L,i)); - break; - case LUA_TBOOLEAN: - printf(lua_toboolean(L, i) ? "true" : "false"); - break; - case LUA_TNUMBER: - printf("%g", lua_tonumber(L,i)); - break; - case LUA_TUSERDATA: - tname = luaT_typename(L, i); - printf("userdata %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object")); - break; - case 10: - tname = luaT_typename(L, i); - printf("cdata %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object")); - break; - case LUA_TTABLE: - lua_pushvalue(L, i); - lua_rawget(L, LUA_REGISTRYINDEX); - if(lua_isstring(L, -1)) - tname = lua_tostring(L, -1); /*luaT_typenameid(L, lua_tostring(L, -1)); */ - else - tname = NULL; - lua_pop(L, 1); - if(tname) - printf("metatable [%s]", tname); - else - { - tname = luaT_typename(L, i); - printf("table %p [%s]", lua_topointer(L, i), (tname ? tname : "not a Torch object")); - } - break; - default: - printf("Lua object type: %s", lua_typename(L,t)); - break; - } - printf("\n"); - } - printf("---------------------------------------------\n"); -} - -/* metatable operator methods */ -static int luaT_mt__index(lua_State *L); -static int luaT_mt__newindex(lua_State *L); -static int luaT_mt__tostring(lua_State *L); -static int luaT_mt__add(lua_State *L); -static int luaT_mt__sub(lua_State *L); -static int luaT_mt__mul(lua_State *L); -static int luaT_mt__div(lua_State *L); -static int luaT_mt__mod(lua_State *L); -static int luaT_mt__pow(lua_State *L); -static int luaT_mt__unm(lua_State *L); -static int luaT_mt__concat(lua_State *L); -static int luaT_mt__len(lua_State *L); -static int luaT_mt__eq(lua_State *L); -static int luaT_mt__lt(lua_State *L); -static int luaT_mt__le(lua_State *L); -static int luaT_mt__call(lua_State *L); - -/* Constructor-metatable methods */ -static int luaT_cmt__call(lua_State *L); -static int luaT_cmt__newindex(lua_State *L); - -const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parent_tname, - lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) -{ - return luaT_newlocalmetatable(L, tname, parent_tname, - constructor, destructor, factory, 0); -} - -const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname, - lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx) -{ - lua_pushcfunction(L, luaT_lua_newmetatable); - lua_pushstring(L, tname); - (parent_tname ? (void)lua_pushstring(L, parent_tname) : lua_pushnil(L)); - (constructor ? lua_pushcfunction(L, constructor) : lua_pushnil(L)); - (destructor ? lua_pushcfunction(L, destructor) : lua_pushnil(L)); - (factory ? lua_pushcfunction(L, factory) : lua_pushnil(L)); - (moduleidx > 0 ? lua_pushvalue(L, moduleidx) : lua_pushnil(L)); - lua_call(L, 6, 1); - return luaT_typenameid(L, tname); -} - -int luaT_pushmetatable(lua_State *L, const char *tname) -{ - lua_getfield(L, LUA_REGISTRYINDEX, tname); - if(lua_isnil(L, -1)) - { - lua_pop(L, 1); - return 0; - } - return 1; -} - -const char *luaT_typenameid(lua_State *L, const char *tname) -{ - if(luaT_pushmetatable(L, tname)) - { - const char *tnameid = NULL; - lua_rawget(L, LUA_REGISTRYINDEX); - if(lua_isstring(L, -1)) - tnameid = lua_tostring(L, -1); - lua_pop(L, 1); /* the string/nil */ - return tnameid; - } - return NULL; -} - -static const char cdataname[] = "" - "local ok, ffi = pcall(require, 'ffi')\n" - "if ok then\n" - " local id2name = {}\n" - " return function(cdata, name)\n" - " local id\n" - " if jit then\n" - " id = tonumber(ffi.typeof(cdata))\n" - " else\n" - " id = tostring(ffi.typeof(cdata))\n" - " end\n" - " if id then\n" - " if name then\n" - " id2name[id] = name\n" - " return name\n" - " else\n" - " return rawget(id2name, id)\n" - " end\n" - " end\n" - " return nil\n" - " end\n" - "else\n" - " return function() end\n" - "end\n"; - -static const char* luaT_cdataname(lua_State *L, int ud, const char *tname) -{ - lua_pushstring(L, "__cdataname"); - lua_rawget(L, LUA_REGISTRYINDEX); - if(lua_isnil(L,-1)) - { - lua_pop(L, 1); - - if(luaL_dostring(L, cdataname)) /* did something go wrong? */ - luaL_error(L, "internal error (could not load cdataname): %s", lua_tostring(L, -1)); - - lua_pushstring(L, "__cdataname"); - lua_pushvalue(L, -2); - lua_rawset(L, LUA_REGISTRYINDEX); - } - if(!lua_isfunction(L, -1)) /* should not happen */ - luaL_error(L, "internal error (cdataname is not a function)"); - - lua_pushvalue(L, ud); - if(tname) - lua_pushstring(L, tname); - if(lua_pcall(L, (tname ? 2 : 1), 1, 0)) - luaL_error(L, "internal error (cdataname): %s", lua_tostring(L, -1)); - - tname = lua_tostring(L, -1); - lua_pop(L, 1); - - return tname; -} - -static void* CDATA_MT_KEY = &CDATA_MT_KEY; -static const char cdatamt[] = "" - "local ok, ffi = pcall(require, 'ffi')\n" - "if ok and not jit then\n" - " return ffi.debug().cdata_mt\n" - "else\n" - " return {}\n" - "end\n"; - -static int luaT_iscdata(lua_State *L, int ud) -{ - int type = lua_type(L, ud); - if(type == 10) - return 1; - if(type != LUA_TUSERDATA) - return 0; - if(!lua_getmetatable(L, ud)) - return 0; - - lua_pushlightuserdata(L, CDATA_MT_KEY); - lua_rawget(L, LUA_REGISTRYINDEX); - if (lua_isnil(L, -1)) - { - // initialize cdata metatable - lua_pop(L, 1); - if(luaL_dostring(L, cdatamt)) - luaL_error(L, "internal error (could not load cdata mt): %s", lua_tostring(L, -1)); - - lua_pushlightuserdata(L, CDATA_MT_KEY); - lua_pushvalue(L, -2); - lua_rawset(L, LUA_REGISTRYINDEX); - } - - int iscdata = lua_rawequal(L, -1, -2); - lua_pop(L, 2); - return iscdata; -} - -const char* luaT_typename(lua_State *L, int ud) -{ - if(luaT_iscdata(L, ud)) - return luaT_cdataname(L, ud, NULL); - else if(lua_getmetatable(L, ud)) - { - const char *tname = NULL; - lua_rawget(L, LUA_REGISTRYINDEX); - if(lua_isstring(L, -1)) - tname = lua_tostring(L, -1); - lua_pop(L, 1); /* the string/nil */ - return tname; - } - return NULL; -} - -void luaT_pushudata(lua_State *L, void *udata, const char *tname) -{ - if(udata) - { - void **udata_p = lua_newuserdata(L, sizeof(void*)); - *udata_p = udata; - if(!luaT_pushmetatable(L, tname)) - luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname); - lua_setmetatable(L, -2); - } - else - lua_pushnil(L); -} - -void *luaT_toudata(lua_State *L, int ud, const char *tname) -{ - void **p = lua_touserdata(L, ud); - if(p != NULL) /* value is a userdata? */ - { - if(!luaT_pushmetatable(L, tname)) - luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname); - - /* initialize the table we want to get the metatable on */ - /* note that we have to be careful with indices, as we just inserted stuff */ - lua_pushvalue(L, (ud < 0 ? ud - 1 : ud)); - while(lua_getmetatable(L, -1)) /* get the next metatable */ - { - lua_remove(L, -2); /* remove the previous metatable [or object, if first time] */ - if(lua_rawequal(L, -1, -2)) - { - lua_pop(L, 2); /* remove the two metatables */ - return *p; - } - } - lua_pop(L, 2); /* remove the two metatables */ - } - return NULL; -} - -int luaT_isudata(lua_State *L, int ud, const char *tname) -{ - if(luaT_toudata(L, ud, tname)) - return 1; - else - return 0; -} - -void *luaT_checkudata(lua_State *L, int ud, const char *tname) -{ - void *p = luaT_toudata(L, ud, tname); - if(!p) - luaT_typerror(L, ud, tname); - return p; -} - -void luaT_pushlong(lua_State *L, long n) -{ -#if LUA_VERSION_NUM >= 503 - /* Only push the value as an integer if it fits in lua_Integer, - or if the lua_Number representation will be even worse */ - if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) { - lua_pushinteger(L, n); - } else { - lua_pushnumber(L, (lua_Number)n); - } -#else - lua_pushnumber(L, (lua_Number)n); -#endif -} - -long luaT_checklong(lua_State *L, int idx) -{ -#if LUA_VERSION_NUM >= 503 - if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) { - return (long)luaL_checkinteger(L, idx); - } else { - return (long)luaL_checknumber(L, idx); - } -#else - return (long)luaL_checknumber(L, idx); -#endif -} - -long luaT_tolong(lua_State *L, int idx) -{ -#if LUA_VERSION_NUM == 503 - if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) { - return (long)lua_tointeger(L, idx); - } else { - return (long)lua_tonumber(L, idx); - } -#else - return (long)lua_tonumber(L, idx); -#endif -} - -void luaT_pushinteger(lua_State *L, ptrdiff_t n) -{ -#if LUA_VERSION_NUM >= 503 - /* Only push the value as an integer if it fits in lua_Integer, - or if the lua_Number representation will be even worse */ - if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) { - lua_pushinteger(L, n); - } else { - lua_pushnumber(L, (lua_Number)n); - } -#else - lua_pushnumber(L, (lua_Number)n); -#endif -} - -ptrdiff_t luaT_checkinteger(lua_State *L, int idx) -{ -#if LUA_VERSION_NUM >= 503 - if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) { - return (ptrdiff_t)luaL_checkinteger(L, idx); - } else { - return (ptrdiff_t)luaL_checknumber(L, idx); - } -#else - return (ptrdiff_t)luaL_checknumber(L, idx); -#endif -} - -void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) -{ - void *p; - lua_getfield(L, ud, field); - if(lua_isnil(L, -1)) - luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); - p = luaT_toudata(L, -1, tname); - if(!p) - luaL_error(L, "bad argument #%d (field %s is not a %s)", ud, field, tname); - return p; -} - -void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) -{ - void *p; - lua_getfield(L, ud, field); - if(lua_isnil(L, -1)) - luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); - - if(!lua_islightuserdata(L, -1)) - luaL_error(L, "bad argument #%d (field %s is not a light userdata)", ud, field); - - p = lua_touserdata(L, -1); - - return p; -} - -double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field) -{ - lua_getfield(L, ud, field); - if(lua_isnil(L, -1)) - luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); - if(!lua_isnumber(L, -1)) - luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field); - return lua_tonumber(L, -1); -} - -int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) -{ - lua_getfield(L, ud, field); - if(lua_isnil(L, -1)) - luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); - if(!lua_isnumber(L, -1)) - luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field); - return (int)lua_tonumber(L, -1); -} - -const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) -{ - lua_getfield(L, ud, field); - if(lua_isnil(L, -1)) - luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); - if(!lua_isstring(L, -1)) - luaL_error(L, "bad argument #%d (field %s is not a string)", ud, field); - return lua_tostring(L, -1); -} - -int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) -{ - lua_getfield(L, ud, field); - if(lua_isnil(L, -1)) - luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); - if(!lua_isboolean(L, -1)) - luaL_error(L, "bad argument #%d (field %s is not a boolean)", ud, field); - return lua_toboolean(L, -1); -} - -void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) -{ - lua_getfield(L, ud, field); - if(lua_isnil(L, -1)) - luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); - if(!lua_istable(L, -1)) - luaL_error(L, "bad argument #%d (field %s is not a table)", ud, field); -} - -/**** type checks as in luaL ****/ -int luaT_typerror(lua_State *L, int ud, const char *tname) -{ - const char *msg; - const char *tnameud = luaT_typename(L, ud); - - if(!tnameud) - tnameud = lua_typename(L, ud); - - msg = lua_pushfstring(L, "%s expected, got %s", - tname, - (tnameud ? tnameud : "unknown object")); - - return luaL_argerror(L, ud, msg); -} - -int luaT_checkboolean(lua_State *L, int ud) -{ - if(!lua_isboolean(L, ud)) - luaT_typerror(L, ud, lua_typename(L, LUA_TBOOLEAN)); - return lua_toboolean(L, ud); -} - -int luaT_optboolean(lua_State *L, int ud, int def) -{ - if(lua_isnoneornil(L,ud)) - return def; - - return luaT_checkboolean(L, ud); -} - -void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) -{ - int idx = lua_gettop(L); - - luaL_checktype(L, idx, LUA_TTABLE); - lua_pushstring(L, name); - lua_rawget(L, idx); - - if(lua_isnil(L, -1)) - { - lua_pop(L, 1); - lua_pushstring(L, name); - lua_newtable(L); - lua_rawset(L, idx); - - lua_pushstring(L, name); - lua_rawget(L, idx); - } - - luaT_setfuncs(L, methods, 0); - lua_pop(L, 1); -} - - -/* returns the name of the class itself (sans nesting) */ -const char* luaT_classrootname(const char *tname) -{ - int idx; - int sz = strlen(tname); - - for(idx = sz-1; idx >= 0 ; idx--) - { - if(tname[idx] == '.') - return tname+idx+1; - } - return tname; -} - -/* parent_name must be a buffer at least as big as tname. - * If class has a parent, returns true; and, sets - * parent name to that of full parent hierarchy (e.g. - * given class `A.b.c`, sets parent_name to `A.b`) - */ -int luaT_fullparentname(const char *tname, char *parent_name) -{ - int sz = strlen(tname); - int idx; - for(idx = sz-1; idx > 0 ; idx--) - if(tname[idx] == '.' || tname[idx] == '\0') break; - - if (idx > 0) strncpy(parent_name, tname, idx); - parent_name[idx] = '\0'; - return tname[idx] == '.'; -} - -/* alias for ensuring backwards compatibility; - * use of luaT_fullparentname is preferred. - */ -int luaT_classmodulename(const char *tname, char *parent_name) -{ - return luaT_fullparentname(tname, parent_name); -} - -/* parent_name must be a buffer at least as big as tname. - * If class has a parent, returns true; and, sets - * parent name to that of outermost parent (e.g. - * given class `A.b.c`, sets parent_name to `A`) - */ -int luaT_outerparentname(const char *tname, char *parent_name) -{ - char chars[] = {'.', '\0'}; - size_t idx; - idx = strcspn(tname, chars); - strncpy(parent_name, tname, idx); - parent_name[idx] = '\0'; - return tname[idx] == '.'; -} - -/* parent_name must be a buffer at least as big as tname. - * If class has a parent, returns true; and, sets parent - * name to that of innermost parent (e.g. given class - * `A.b.c`, sets parent_name to `b`). In the comments - * below, the inner parent name is abbreviated as IPN. - */ -int luaT_innerparentname(const char *tname, char *parent_name) -{ - int sz = strlen(tname); - int tail, head; - for(tail = sz-1; tail >= 0 ; tail--) // tail points to - if(tname[tail] == '.') break; // just past IPN - - if (tail == 0) return 0; - - for(head = tail-1; head >= 0; head--) // head points to - if(tname[head] == '.') break; // just before IPN - - head += 1; // update head to start of IPN - tail -= head; // update tail to strlen(IPN) - strncpy(parent_name, tname+head, tail); - parent_name[tail] = '\0'; - return 1; -} - -/* Method for pushing a class's immediate parent to the - * stack (e.g. given class `A.b.c`, pushes `b` to the stack) - */ -void luaT_getinnerparent(lua_State *L, const char *tname) -{ - /* Local variables */ - char term[256]; - char chars[] = {'.', '\0'}; - const char *tname_full = tname; // used for error case - - /* Get outermost table from Lua */ - int n = strcspn(tname, chars); - strncpy(term, tname, n); - term[n] = '\0'; - lua_getglobal(L, term); - tname += n + 1; - - /* Traverse hierarchy down to last table*/ - n = strcspn(tname, chars); - while(n < strlen(tname)) - { - /* Check that current parent is a table (i.e. a module) */ - if(!lua_istable(L, -1)){ - strncpy(term, tname_full, tname - tname_full - 1); - term[tname - tname_full] = '\0'; - luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname_full, term); - } - strncpy(term, tname, n); - term[n] = '\0'; - lua_getfield(L, -1, term); - lua_remove(L, -2); - tname += n + 1; - n = strcspn(tname, chars); // prepare for next - } - - /* Check that resulting parent is a table (i.e. a module) */ - if(!lua_istable(L, -1)){ - strncpy(term, tname_full, tname - tname_full - 1); - term[tname - tname_full] = '\0'; - luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname_full, term); - } -} - - -int luaT_lua_newmetatable(lua_State *L) -{ - /* Local Variables */ - const char* tname = luaL_checkstring(L, 1); - char parent_name[256]; - int is_in_module = 0; - - /* Argument Checking */ - lua_settop(L, 6); - luaL_argcheck(L, lua_isnoneornil(L, 2) || lua_isstring(L, 2), 2, "parent class name or nil expected"); - luaL_argcheck(L, lua_isnoneornil(L, 3) || lua_isfunction(L, 3), 3, "constructor function or nil expected"); - luaL_argcheck(L, lua_isnoneornil(L, 4) || lua_isfunction(L, 4), 4, "destructor function or nil expected"); - luaL_argcheck(L, lua_isnoneornil(L, 5) || lua_isfunction(L, 5), 5, "factory function or nil expected"); - luaL_argcheck(L, lua_isnoneornil(L, 6) || lua_istable(L, 6), 6, "module table or nil expected"); - - /* Push immediate parent module to stack */ - if(lua_isnoneornil(L, 6)) { - lua_pop(L, 1); /* remove the nil */ - is_in_module = luaT_fullparentname(tname, parent_name); - if (is_in_module) - luaT_getinnerparent(L, tname); - else - lua_pushglobaltable(L); - } - - if(!lua_istable(L, -1)) - luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname, parent_name); - - /* we first create the new metaclass if we have to */ - if(!luaT_pushmetatable(L, tname)) - { - /* create the metatable */ - lua_newtable(L); - - /* registry[name] = metatable */ - lua_pushvalue(L, -1); - lua_setfield(L, LUA_REGISTRYINDEX, tname); - - /* registry[metatable] = tname */ - lua_pushvalue(L, -1); - lua_pushstring(L, tname); - lua_rawset(L, LUA_REGISTRYINDEX); - - /* __index handling */ - lua_pushcfunction(L, luaT_mt__index); - lua_setfield(L, -2, "__index"); - - /* __newindex handling */ - lua_pushcfunction(L, luaT_mt__newindex); - lua_setfield(L, -2, "__newindex"); - - /* __typename contains the typename */ - lua_pushstring(L, tname); - lua_setfield(L, -2, "__typename"); - - /* __metatable is self */ - lua_pushvalue(L, -1); - lua_setfield(L, -2, "__metatable"); - - /* by default, __version equals 1 */ - lua_pushnumber(L, 1); - lua_setfield(L, -2, "__version"); - - /* assign default operator functions */ - lua_pushcfunction(L, luaT_mt__tostring); - lua_setfield(L, -2, "__tostring"); - - lua_pushcfunction(L, luaT_mt__add); - lua_setfield(L, -2, "__add"); - - lua_pushcfunction(L, luaT_mt__sub); - lua_setfield(L, -2, "__sub"); - - lua_pushcfunction(L, luaT_mt__mul); - lua_setfield(L, -2, "__mul"); - - lua_pushcfunction(L, luaT_mt__div); - lua_setfield(L, -2, "__div"); - - lua_pushcfunction(L, luaT_mt__mod); - lua_setfield(L, -2, "__mod"); - - lua_pushcfunction(L, luaT_mt__pow); - lua_setfield(L, -2, "__pow"); - - lua_pushcfunction(L, luaT_mt__unm); - lua_setfield(L, -2, "__unm"); - - lua_pushcfunction(L, luaT_mt__concat); - lua_setfield(L, -2, "__concat"); - - lua_pushcfunction(L, luaT_mt__len); - lua_setfield(L, -2, "__len"); - - lua_pushcfunction(L, luaT_mt__eq); - lua_setfield(L, -2, "__eq"); - - lua_pushcfunction(L, luaT_mt__lt); - lua_setfield(L, -2, "__lt"); - - lua_pushcfunction(L, luaT_mt__le); - lua_setfield(L, -2, "__le"); - - lua_pushcfunction(L, luaT_mt__call); - lua_setfield(L, -2, "__call"); - } - - /* we assign the parent class if necessary */ - if(!lua_isnoneornil(L, 2)) - { - if(lua_getmetatable(L, -1)) - luaL_error(L, "class %s has been already assigned a parent class\n", tname); - else - { - const char* parent_tname = luaL_checkstring(L, 2); - if(!luaT_pushmetatable(L, parent_tname)) - luaL_error(L, "bad argument #2 (invalid parent class name %s)", parent_tname); - lua_setmetatable(L, -2); - } - } - - /* register the destructor function */ - if(!lua_isnoneornil(L, 4)) - { - /* does it exists already? */ - lua_pushstring(L, "__gc"); - lua_rawget(L, -2); - - if(lua_isnil(L, -1)) - { - lua_pop(L, 1); /* pop nil */ - lua_pushstring(L, "__gc"); - lua_pushvalue(L, 4); - lua_rawset(L, -3); - } - else - luaL_error(L, "%s has been already assigned a destructor", tname); - } - - /* register the factory function */ - if(!lua_isnoneornil(L, 5)) - { - /* does it exists already? */ - lua_pushstring(L, "__factory"); - lua_rawget(L, -2); - - if(lua_isnil(L, -1)) - { - lua_pop(L, 1); /* pop nil */ - lua_pushstring(L, "__factory"); - lua_pushvalue(L, 5); - lua_rawset(L, -3); - } - else - luaL_error(L, "%s has been already assigned a factory", tname); - } - - /******** Constructor table and metatable ********/ - lua_pushstring(L, "__constructor"); - lua_rawget(L, -2); - if(lua_isnil(L, -1)) - { - lua_pop(L, 1); /* pop nil */ - lua_newtable(L); /* fancy table */ - lua_newtable(L); /* fancy metatable */ - - lua_pushvalue(L, -3); /* metatable */ - lua_setfield(L, -2, "__index"); /* so we can get the methods */ - - lua_pushcfunction(L, luaT_cmt__newindex); - lua_setfield(L, -2, "__newindex"); /* so we add new methods */ - - lua_pushcfunction(L, luaT_cmt__call); - lua_setfield(L, -2, "__call"); /* so we can create, we are here for only that */ - - lua_pushvalue(L, -3); - lua_setfield(L, -2, "__metatable"); /* redirect to metatable with methods */ - - lua_setmetatable(L, -2); /* constructor metatable is ... this fancy metatable */ - - /* set metatable[__constructor] = constructor-metatable */ - lua_pushstring(L, "__constructor"); - lua_pushvalue(L, -2); - lua_rawset(L, -4); - } - - /* register the constructor function */ - if(!lua_isnoneornil(L, 3)) - { - /* get constructor metatable */ - lua_getmetatable(L, -1); - - /* does it exists already? */ - lua_pushstring(L, "__new"); - lua_rawget(L, -2); - - if(lua_isnil(L, -1)) - { - lua_pop(L, 1); /* pop nil */ - lua_pushstring(L, "__new"); - lua_pushvalue(L, 3); - lua_rawset(L, -3); - - /* set "new" in the metatable too */ - lua_pushstring(L, "new"); - lua_pushvalue(L, 3); - lua_rawset(L, -5); - } - else - luaL_error(L, "%s has been already assigned a constructor", tname); - - /* pop constructor metatable */ - lua_pop(L, 1); - } - - /* module.name = constructor metatable */ - lua_setfield(L, 6, luaT_classrootname(tname)); - - return 1; /* returns the metatable */ -} - -/* Lua only utility functions */ - -/* add any custom type, provided the object has a metatable */ -int luaT_lua_metatype(lua_State *L) -{ - if( (lua_gettop(L) != 2) && (lua_gettop(L) != 3) ) - luaL_error(L, "expecting: string table [ctype]"); - - luaL_checkstring(L, 1); - luaL_checktype(L, 2, LUA_TTABLE); - - if(lua_gettop(L) == 3) - { - if(!luaT_cdataname(L, 3, lua_tostring(L, 1))) - luaL_error(L, "could not register cdata type -- missing ffi library?"); - } - - /* registry[name] = metatable */ - lua_pushvalue(L, 1); - lua_pushvalue(L, 2); - lua_rawset(L, LUA_REGISTRYINDEX); - - /* registry[metatable] = tname */ - lua_pushvalue(L, 2); - lua_pushvalue(L, 1); - lua_rawset(L, LUA_REGISTRYINDEX); - - return 0; -} - -/* return a userdata from a C pointer */ -/* you are better to know what you are doing */ -int luaT_lua_pushudata(lua_State *L) -{ - void *udata = NULL; - const char *tname = luaL_checkstring(L, 2); - - if(lua_type(L, 1) == 10) - udata = *((void**)lua_topointer(L, 1)); - else if(luaT_iscdata(L, 1)) - udata = ((void**)lua_topointer(L, 1))[4]; - else if(lua_isnumber(L, 1)) - udata = (void*)(uintptr_t)lua_tonumber(L, 1); - else - luaL_argerror(L, 1, "expecting number or cdata"); - - luaT_pushudata(L, udata, tname); - - return 1; -} - -int luaT_lua_factory(lua_State *L) -{ - const char* tname = luaL_checkstring(L, 1); - if(luaT_pushmetatable(L, tname) && !lua_isnil(L, -1)) - { - lua_pushstring(L, "__factory"); - lua_rawget(L, -2); - } - else - { - lua_pushnil(L); - } - return 1; -} - -int luaT_lua_getconstructortable(lua_State *L) -{ - const char* tname = luaL_checkstring(L, 1); - if(luaT_pushmetatable(L, tname)) - { - lua_pushstring(L, "__constructor"); - lua_rawget(L, -2); - return 1; - } - return 0; -} - - -int luaT_lua_typename(lua_State *L) -{ - const char* tname = NULL; - luaL_checkany(L, 1); - if((tname = luaT_typename(L, 1))) - { - lua_pushstring(L, tname); - return 1; - } - return 0; -} - -int luaT_lua_isequal(lua_State *L) -{ - if(lua_isuserdata(L, 1) && lua_isuserdata(L, 2)) - { - void **u1, **u2; - luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected"); - luaL_argcheck(L, luaT_typename(L, 2), 2, "Torch object expected"); - - u1 = lua_touserdata(L, 1); - u2 = lua_touserdata(L, 2); - if(*u1 == *u2) - lua_pushboolean(L, 1); - else - lua_pushboolean(L, 0); - } - else if(lua_istable(L, 1) && lua_istable(L, 2)) - lua_pushboolean(L, lua_rawequal(L, 1, 2)); - else - lua_pushboolean(L, 0); - return 1; -} - -static void luaT_pushpointer(lua_State *L, const void *ptr) -{ -#if LUA_VERSION_NUM >= 503 - // this assumes that lua_Integer is a ptrdiff_t - if (sizeof(void *) > sizeof(lua_Integer)) - luaL_error(L, "Pointer value can't be represented as a Lua integer (an overflow would occur)"); - lua_pushinteger(L, (uintptr_t)(ptr)); -#else - // 2^53 - this assumes that lua_Number is a double - if ((uintptr_t)ptr > 9007199254740992LLU) - luaL_error(L, "Pointer value can't be represented as a Lua number (an overflow would occur)"); - lua_pushnumber(L, (uintptr_t)(ptr)); -#endif -} - -int luaT_lua_pointer(lua_State *L) -{ - if(lua_type(L, 1) == 10) /* luajit cdata */ - { - /* we want the pointer holded by cdata */ - /* not the pointer on the cdata object */ - const void* ptr = *((void**)lua_topointer(L, 1)); - luaT_pushpointer(L, ptr); - return 1; - } - else if (luaT_iscdata(L, 1)) /* luaffi cdata */ - { - void** ptr = (void**)lua_touserdata(L, 1); - luaT_pushpointer(L, ptr[4]); - return 1; - } - else if(lua_isuserdata(L, 1)) - { - void **ptr; - luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected"); - ptr = lua_touserdata(L, 1); - luaT_pushpointer(L, *ptr); - return 1; - } - else if(lua_istable(L, 1) || lua_isthread(L, 1) || lua_isfunction(L, 1)) - { - const void* ptr = lua_topointer(L, 1); - luaT_pushpointer(L, ptr); - return 1; - } - else if(lua_isstring(L, 1)) - { - const char* ptr = lua_tostring(L, 1); - luaT_pushpointer(L, ptr); - return 1; - } - else - luaL_error(L, "Torch object, table, thread, cdata or function expected"); - - return 0; -} - -int luaT_lua_setenv(lua_State *L) -{ - if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1)) - luaL_typerror(L, 1, "function or userdata"); - luaL_checktype(L, 2, LUA_TTABLE); - lua_setuservalue(L, 1); - return 0; -} - -int luaT_lua_getenv(lua_State *L) -{ - if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1)) - luaL_typerror(L, 1, "function or userdata"); - lua_getuservalue(L, 1); - if (lua_isnil(L, -1)) - lua_newtable(L); - return 1; -} - -int luaT_lua_getmetatable(lua_State *L) -{ - const char *tname = luaL_checkstring(L, 1); - if(luaT_pushmetatable(L, tname)) - return 1; - return 0; -} - -int luaT_lua_version(lua_State *L) -{ - luaL_checkany(L, 1); - - if(luaT_iscdata(L, 1)) - { - const char *tname = luaT_cdataname(L, 1, NULL); - if(tname) - { - luaT_pushmetatable(L, tname); - lua_pushstring(L, "__version"); - lua_rawget(L, -2); - return 1; - } - return 0; - } - else if(lua_getmetatable(L, 1)) - { - lua_pushstring(L, "__version"); - lua_rawget(L, -2); - return 1; - } - return 0; -} - -int luaT_lua_setmetatable(lua_State *L) -{ - const char *tname = luaL_checkstring(L, 2); - luaL_checktype(L, 1, LUA_TTABLE); - - if(!luaT_pushmetatable(L, tname)) - luaL_error(L, "unknown typename %s\n", tname); - lua_setmetatable(L, 1); - - return 1; -} - -/* metatable operator methods */ -static int luaT_mt__index(lua_State *L) -{ - if(!lua_getmetatable(L, 1)) - luaL_error(L, "critical internal indexing error: no metatable found"); - - if(!lua_istable(L, -1)) - luaL_error(L, "critical internal indexing error: not a metatable"); - - /* test for __index__ method first */ - lua_getfield(L, -1, "__index__"); - if(!lua_isnil(L, -1)) - { - int result; - - if(!lua_isfunction(L, -1)) - luaL_error(L, "critical internal indexing error: __index__ is not a function"); - - lua_pushvalue(L, 1); - lua_pushvalue(L, 2); - - lua_call(L, 2, LUA_MULTRET); /* DEBUG: risque: faut vraiment retourner 1 ou 2 valeurs... */ - - result = lua_toboolean(L, -1); - lua_pop(L, 1); - - if(result) - return 1; - - /* on the stack: 1. the object 2. the value 3. the metatable */ - /* apparently, __index wants only one element returned */ - /* return lua_gettop(L)-3; */ - - } - else - lua_pop(L, 1); /* remove nil __index__ on the stack */ - - lua_pushvalue(L, 2); - lua_gettable(L, -2); - - return 1; -} - -static int luaT_mt__newindex(lua_State *L) -{ - if(!lua_getmetatable(L, 1)) - luaL_error(L, "critical internal indexing error: no metatable found"); - - if(!lua_istable(L, -1)) - luaL_error(L, "critical internal indexing error: not a metatable"); - - /* test for __newindex__ method first */ - lua_getfield(L, -1, "__newindex__"); - if(!lua_isnil(L, -1)) - { - int result; - - if(!lua_isfunction(L, -1)) - luaL_error(L, "critical internal indexing error: __newindex__ is not a function"); - - lua_pushvalue(L, 1); - lua_pushvalue(L, 2); - lua_pushvalue(L, 3); - - lua_call(L, 3, 1); /* DEBUG: risque: faut vraiment retourner qqch */ - - result = lua_toboolean(L, -1); - lua_pop(L, 1); - - if(result) - return 0; - } - else - lua_pop(L, 1); /* remove nil __newindex__ on the stack */ - - lua_pop(L, 1); /* pop the metatable */ - if(lua_istable(L, 1)) - lua_rawset(L, 1); - else - luaL_error(L, "the class %s cannot be indexed", luaT_typename(L, 1)); - - return 0; -} - - -#define MT_UNI_OPERATOR_GET_HANDLER(NAME) \ - if(!lua_getmetatable(L, 1)) \ - luaL_error(L, "internal error in __" #NAME ": no metatable"); - -#define MT_BIN_OPERATOR_GET_HANDLER(NAME) \ - if(!lua_getmetatable(L, 1) && !lua_getmetatable(L,2) ) \ - luaL_error(L, "internal error in __" #NAME \ - ": no metatable in both operands"); - -#define MT_DECLARE_OPERATOR_BODY(NAME, NIL_BEHAVIOR) \ - \ - lua_getfield(L, -1, "__" #NAME "__"); \ - if(lua_isnil(L, -1)) \ - { \ - NIL_BEHAVIOR; \ - } \ - else \ - { \ - if(lua_isfunction(L, -1)) \ - { \ - lua_insert(L, 1); /* insert function */ \ - lua_pop(L, 1); /* remove metatable */ \ - lua_call(L, lua_gettop(L)-1, LUA_MULTRET); \ - /* we return the result of the call */ \ - return lua_gettop(L); \ - } \ - /* we return the thing the user left in __tostring__ */ \ - } \ - return 0; \ - -/* note: check dans metatable pour ca, donc necessaire */ -#define MT_DECLARE_OPERATOR(NAME, NIL_BEHAVIOR) \ - int luaT_mt__##NAME(lua_State *L) \ - { \ - MT_UNI_OPERATOR_GET_HANDLER(NAME) \ - MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR) \ - } - -#define MT_DECLARE_BIN_OPERATOR(NAME, NIL_BEHAVIOR) \ - int luaT_mt__##NAME(lua_State *L) \ - { \ - MT_BIN_OPERATOR_GET_HANDLER(NAME) \ - MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR) \ - } - - -#define BIN_OPERATOR_ERROR(NAME) \ - luaL_error(L, "both %s and %s have no " #NAME " operator", \ - luaT_typename(L, 1), luaT_typename(L,2)) - -MT_DECLARE_BIN_OPERATOR(add, BIN_OPERATOR_ERROR(addition) ) -MT_DECLARE_BIN_OPERATOR(sub, BIN_OPERATOR_ERROR(substraction) ) -MT_DECLARE_BIN_OPERATOR(mul, BIN_OPERATOR_ERROR(multiplication) ) -MT_DECLARE_BIN_OPERATOR(div, BIN_OPERATOR_ERROR(division) ) -MT_DECLARE_BIN_OPERATOR(mod, BIN_OPERATOR_ERROR(modulo) ) -MT_DECLARE_BIN_OPERATOR(pow, BIN_OPERATOR_ERROR(power) ) -MT_DECLARE_BIN_OPERATOR(concat, BIN_OPERATOR_ERROR(concat) ) -MT_DECLARE_BIN_OPERATOR(eq, - lua_settop(L, 2); - lua_pushcfunction(L, luaT_lua_isequal); - lua_insert(L, 1); - lua_call(L, 2, 1); - return 1;) -MT_DECLARE_BIN_OPERATOR(lt, BIN_OPERATOR_ERROR(less-than) ) -MT_DECLARE_BIN_OPERATOR(le, BIN_OPERATOR_ERROR(less-equal) ) - -MT_DECLARE_OPERATOR(tostring, - lua_pushstring(L, luaT_typename(L, 1)); - return 1;) -MT_DECLARE_OPERATOR(call, luaL_error(L, "%s has no call operator", luaT_typename(L, 1))) -MT_DECLARE_OPERATOR(unm, luaL_error(L, "%s has no negation operator", luaT_typename(L, 1))) -MT_DECLARE_OPERATOR(len, luaL_error(L, "%s has no length operator", luaT_typename(L, 1))) - - -/* constructor metatable methods */ -int luaT_cmt__call(lua_State *L) -{ - if(!lua_istable(L, 1)) - luaL_error(L, "internal error in __call: not a constructor table"); - - if(!lua_getmetatable(L, 1)) - luaL_error(L, "internal error in __call: no metatable available"); - - lua_pushstring(L, "__new"); - lua_rawget(L, -2); - - if(lua_isnil(L, -1)) - luaL_error(L, "no constructor available"); - - lua_remove(L, 1); /* remove constructor atable */ - lua_insert(L, 1); /* insert constructor */ - lua_pop(L, 1); /* remove fancy metatable */ - - lua_call(L, lua_gettop(L)-1, LUA_MULTRET); - return lua_gettop(L); -} - -int luaT_cmt__newindex(lua_State *L) -{ - if(!lua_istable(L, 1)) - luaL_error(L, "internal error in __newindex: not a constructor table"); - - if(!lua_getmetatable(L, 1)) - luaL_error(L, "internal error in __newindex: no metatable available"); - - lua_pushstring(L, "__metatable"); - lua_rawget(L, -2); - - if(!lua_istable(L, -1)) - luaL_error(L, "internal error in __newindex: no metaclass available"); - - lua_insert(L, 2); - lua_pop(L, 1); /* remove the metatable over the constructor table */ - - lua_rawset(L, -3); - - return 0; -} - -/******************** deprecated functions ********************/ -int luaT_pushmetaclass(lua_State *L, const char *tname) -{ - return luaT_pushmetatable(L, tname); -} - -const char* luaT_id(lua_State *L, int ud) -{ - return luaT_typename(L, ud); -} - -const char* luaT_id2typename(lua_State *L, const char *id) -{ - return id; -} - -const char* luaT_typename2id(lua_State *L, const char *tname) -{ - return luaT_typenameid(L, tname); -} - -int luaT_getmetaclass(lua_State *L, int index) -{ - return lua_getmetatable(L, index); -} - -const char* luaT_checktypename2id(lua_State *L, const char *tname) -{ - const char* id = luaT_typenameid(L, tname); - if(!id) - luaL_error(L, "unknown class <%s>", tname); - return id; -} - -void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id) -{ - luaT_registeratname(L, methods, id); -} - -/**************************************************************/ diff --git a/contrib/lua-torch/torch7/lib/luaT/luaT.h b/contrib/lua-torch/torch7/lib/luaT/luaT.h deleted file mode 100644 index 2479a1dc1..000000000 --- a/contrib/lua-torch/torch7/lib/luaT/luaT.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef LUAT_UTILS_INC -#define LUAT_UTILS_INC - -#ifdef __cplusplus -extern "C" { -#endif -#include <lua.h> -#include <lauxlib.h> -#ifdef __cplusplus -} -#endif - -#ifndef LUA_EXTERNC -# ifdef __cplusplus -# define LUA_EXTERNC extern "C" -# else -# define LUA_EXTERNC extern -# endif -#endif - -#if (defined(_MSC_VER) || defined(__MINGW32__)) -# define DLL_EXPORT __declspec(dllexport) -# define DLL_IMPORT __declspec(dllimport) -# ifdef luaT_EXPORTS -# define LUAT_API LUA_EXTERNC DLL_EXPORT -# else -# define LUAT_API LUA_EXTERNC DLL_IMPORT -# endif -#else -# define DLL_EXPORT -# define DLL_IMPORT -# define LUAT_API LUA_EXTERNC -#endif - -#if LUA_VERSION_NUM == 501 -# define lua_pushglobaltable(L) lua_pushvalue(L, LUA_GLOBALSINDEX) -# define lua_setuservalue lua_setfenv -# define lua_getuservalue lua_getfenv -#else -# define lua_objlen lua_rawlen -static int luaL_typerror(lua_State *L, int narg, const char *tname) -{ - return luaL_error(L, "%s expected, got %s", tname, luaL_typename(L, narg)); -} -#endif - - -/* C functions */ - -LUAT_API void* luaT_alloc(lua_State *L, ptrdiff_t size); -LUAT_API void* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size); -LUAT_API void luaT_free(lua_State *L, void *ptr); - -LUAT_API void luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup); - -LUAT_API const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname, - lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx); - -LUAT_API const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, - lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory); - -LUAT_API int luaT_pushmetatable(lua_State *L, const char *tname); - -LUAT_API const char* luaT_typenameid(lua_State *L, const char *tname); -LUAT_API const char* luaT_typename(lua_State *L, int ud); - -LUAT_API void luaT_pushudata(lua_State *L, void *udata, const char *tname); -LUAT_API void *luaT_toudata(lua_State *L, int ud, const char *tname); -LUAT_API int luaT_isudata(lua_State *L, int ud, const char *tname); -LUAT_API void *luaT_checkudata(lua_State *L, int ud, const char *tname); - -LUAT_API void luaT_pushlong(lua_State *L, long n); -LUAT_API long luaT_checklong(lua_State *L, int idx); -LUAT_API long luaT_tolong(lua_State *L, int idx); - -LUAT_API void luaT_pushinteger(lua_State *L, ptrdiff_t n); -LUAT_API ptrdiff_t luaT_checkinteger(lua_State *L, int idx); - -LUAT_API void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname); -LUAT_API void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field); -LUAT_API double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field); -LUAT_API int luaT_getfieldcheckint(lua_State *L, int ud, const char *field); -LUAT_API const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field); -LUAT_API int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field); -LUAT_API void luaT_getfieldchecktable(lua_State *L, int ud, const char *field); - -LUAT_API int luaT_typerror(lua_State *L, int ud, const char *tname); - -LUAT_API int luaT_checkboolean(lua_State *L, int ud); -LUAT_API int luaT_optboolean(lua_State *L, int ud, int def); - -LUAT_API void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name); - -/* utility functions */ -LUAT_API const char *luaT_classrootname(const char *tname); -LUAT_API int luaT_classmodulename(const char *tname, char *module_name); - -/* debug */ -LUAT_API void luaT_stackdump(lua_State *L); - -/* Lua functions */ -LUAT_API int luaT_lua_newmetatable(lua_State *L); -LUAT_API int luaT_lua_factory(lua_State *L); -LUAT_API int luaT_lua_getconstructortable(lua_State *L); -LUAT_API int luaT_lua_typename(lua_State *L); -LUAT_API int luaT_lua_isequal(lua_State *L); -LUAT_API int luaT_lua_pointer(lua_State *L); -LUAT_API int luaT_lua_setenv(lua_State *L); -LUAT_API int luaT_lua_getenv(lua_State *L); -LUAT_API int luaT_lua_getmetatable(lua_State *L); -LUAT_API int luaT_lua_version(lua_State *L); -LUAT_API int luaT_lua_setmetatable(lua_State *L); -LUAT_API int luaT_lua_metatype(lua_State *L); -LUAT_API int luaT_lua_pushudata(lua_State *L); - -/* deprecated functions */ -/* ids have been replaced by string names to identify classes */ -/* comments show what function (that you should use) they call now */ -#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) -#define LUAT_DEPRECATED __attribute__((__deprecated__)) -#elif (defined(_MSC_VER) || defined(__MINGW32__)) -#define LUAT_DEPRECATED __declspec(deprecated) -#else -#define LUAT_DEPRECATED -#endif - -LUAT_API LUAT_DEPRECATED int luaT_pushmetaclass(lua_State *L, const char *tname); /* same as luaT_pushmetatable */ -LUAT_API LUAT_DEPRECATED const char* luaT_id(lua_State *L, int ud); /* same as luaT_typename */ -LUAT_API LUAT_DEPRECATED const char* luaT_id2typename(lua_State *L, const char *id); /* same as luaT_typenameid */ -LUAT_API LUAT_DEPRECATED const char* luaT_typename2id(lua_State *L, const char*); /* same as luaT_typenameid */ -LUAT_API LUAT_DEPRECATED int luaT_getmetaclass(lua_State *L, int index); /* same as luaT_getmetatable */ -LUAT_API LUAT_DEPRECATED const char* luaT_checktypename2id(lua_State *L, const char *tname); /* same as luaT_typenameid */ -LUAT_API LUAT_DEPRECATED void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id); /* same as luaT_registeratname */ - -#endif diff --git a/contrib/lua-torch/torch7/lib/luaT/luaTConfig.cmake.in b/contrib/lua-torch/torch7/lib/luaT/luaTConfig.cmake.in deleted file mode 100644 index bfb20b87a..000000000 --- a/contrib/lua-torch/torch7/lib/luaT/luaTConfig.cmake.in +++ /dev/null @@ -1,9 +0,0 @@ -# Find the luaT includes and library -# -# LUAT_INCLUDE_DIR -- where to find the includes -# LUAT_LIBRARIES -- list of libraries to link against -# LUAT_FOUND -- set to 1 if found - -SET(LUAT_FOUND 1) -SET(LUAT_INCLUDE_DIR "@LUAT_INCLUDE_DIR@") -SET(LUAT_LIBRARIES "@LUAT_LIBRARIES@") |