我SSE
用来实现矩阵乘法,我Stack Overflow Exception
在运行代码时得到了.而且stack overflow exception
是在chktsk.asm
; Find next lower page and probe cs20: sub eax, _PAGESIZE_ ; decrease by PAGESIZE test dword ptr [eax],eax ; probe page. jmp short cs10 _chkstk endp end
很难发现哪里出错了,我的代码是:
main.cpp
#include "sse_matrix.h" int main(int argc, char* argv[]) { float left[size] = {0}; float right[size] = {0}; float result[size] = {0}; // initialize value for (int i = 0; i < dim; i ++) { for (int j = 0; j < dim; j ++) { left[i*dim + j] = j; right[i*dim + j] = j; } } //calculate the result SSE_Matrix_Multiply(left, right, result); /*for (int i = 0; i < dim; i ++) { for (int j = 0; j < dim; j ++) { cout << result[i * dim + j] << " "; } cout << endl; }*/ system("pause"); return 0; }
包含文件:
#ifndef __SSE_MATRIX_H #define __SSE_MATRIX_H #includeusing std::cin; using std::cout; using std::endl; #define dim 512 #define size dim * dim struct Matrix_Info { float *A; int ax, ay; float *B; int bx, by; float *C; int cx, cy; int m; int n; }; void Transpose_Matrix_SSE(float * matrix) { __m128 row1 = _mm_loadu_ps(&matrix[0*4]); __m128 row2 = _mm_loadu_ps(&matrix[1*4]); __m128 row3 = _mm_loadu_ps(&matrix[2*4]); __m128 row4 = _mm_loadu_ps(&matrix[3*4]); _MM_TRANSPOSE4_PS(row1, row2, row3, row4); _mm_storeu_ps(&matrix[0*4], row1); _mm_storeu_ps(&matrix[1*4], row2); _mm_storeu_ps(&matrix[2*4], row3); _mm_storeu_ps(&matrix[3*4], row4); } float * Shuffle_Matrix_Multiply(float * left, float * right) { __m128 _t1, _t2, _sum; _sum = _mm_setzero_ps(); // set all value of _sum to zero float _result[size] = {0}; float _res[4] = {0}; for (int i = 0; i < 4; i ++) { for (int j = 0; j < 4; j ++) { _t1 = _mm_loadu_ps(left + i * 4); _t2 = _mm_loadu_ps(right + j * 4); _sum = _mm_mul_ps(_t1, _t2); _mm_storeu_ps(_res, _sum); _result[i * 4 + j] = _res[0] + _res[1] + _res[2] + _res[3]; } } return _result; } float * SSE_4_Matrix(struct Matrix_Info * my_info) { int m = my_info->m; int n = my_info->n; int ax = my_info->ax; int ay = my_info->ay; int bx = my_info->bx; int by = my_info->by; //1. split Matrix A and Matrix B float * _a = new float[16]; float * _b = new float[16]; for (int i = 0; i < m; i ++) { for (int j = 0; j < m; j ++) { _a[i*m + j] = *(my_info->A + (i + ax) * n + j + ay); _b[i*m + j] = *(my_info->B + (i + bx) * n + j + by); } } //2. transpose Matrix B Transpose_Matrix_SSE(_b); //3. calculate result and return a float pointer return Shuffle_Matrix_Multiply(_a, _b); } int Matrix_Multiply(struct Matrix_Info * my_info) { int m = my_info->m; int n = my_info->n; int cx = my_info->cx; int cy = my_info->cy; for (int i = 0; i < m; i ++) { for (int j = 0; j < m; j ++) { *(my_info->C + (i + cx) * n + j + cy) += SSE_4_Matrix(my_info)[i*m + j]; } } return 0; } void SSE_Matrix_Multiply(float *left, float *right, float *result) { struct Matrix_Info my_info; my_info.A = left; my_info.B = right; my_info.C = result; my_info.n = dim; my_info.m = 4; // Matrix A row:i, column:j for (int i = 0; i < dim; i += 4) { for (int j = 0; j < dim; j += 4) { // Matrix B row:j column:k for (int k = 0; k < dim; k += 4) { my_info.ax = i; my_info.ay = j; my_info.bx = j; my_info.by = k; my_info.cx = i; my_info.cy = k; Matrix_Multiply(&my_info); } } } } #endif
当dim
包含文件中的(矩阵的维度)时4, 8, 16, 32, 64, 128 and 256
,Stack Overflow Exception
将不会发生.但是,当dim
512或更大时,Stack Overflow Exception
将发生.
我的操作系统是Windows 10
和IDE
现在的Visual Studio 2012
.
让我感到困惑的是,当我在中断main.cpp
时#include "sse_matrix.h"
,当它运行时,它会发生Stack Overflow Exception
.我认为我的代码中没有语法错误或逻辑错误.但我不知道如何解决它.
你#define
荷兰国际集团size
是dim * dim
你所后#define dim 512
,所以size
是262,144.然后你把它float _result[size]
放在堆栈上.大多数堆栈都小于262,144*sizeof(float)
.
正如@πάνταῥεῖ所说,你可能想要更像的东西:
std::vector_result(size, 0);
这样你的262,144*sizeof(float)
内存块被放在堆上并为你管理(这比自己分配和解除分配内存要好得多 - 甚至比智能指针更好,因为它std::vector
会适应你的大小变化).
正如@ Basile Starynkevitch指出的那样,你不需要:
#define size dim * dim
因为该dim * dim
文本将被替换,size
并且很容易导致语法错误或更糟.
更好的是:
#define size (dim * dim)
更好的是:
constexpr size_t dim = 512; constexpr size_t size = dim * dim;