Лекции.Орг


Поиск:




Категории:

Астрономия
Биология
География
Другие языки
Интернет
Информатика
История
Культура
Литература
Логика
Математика
Медицина
Механика
Охрана труда
Педагогика
Политика
Право
Психология
Религия
Риторика
Социология
Спорт
Строительство
Технология
Транспорт
Физика
Философия
Финансы
Химия
Экология
Экономика
Электроника

 

 

 

 


Реализация параллельного алгоритма умножения матриц




Ниже приводится исходный код программы, реализующей параллельный алгоритм перемножения матриц на основе алгоритма Фокса:

#include <stdio.h>

#include <stdlib.h>

#include <time.h>

#include <math.h>

#include <mpi.h>

int ProcNum = 0; // Number of available processes

int ProcRank = 0; // Rank of current process

int GridSize; // Size of virtual processor grid

int GridCoords[2]; // Coordinates of current processor in grid

MPI_Comm GridComm; // Grid communicator

MPI_Comm ColComm; // Column communicator

MPI_Comm RowComm; // Row communicator

// Function for simple initialization of matrix elements

void DummyDataInitialization (double* pAMatrix, double* pBMatrix,int Size){

int i, j; // Loop variables

for (i=0; i<Size; i++)

for (j=0; j<Size; j++) {

pAMatrix[i*Size+j] = 1;

pBMatrix[i*Size+j] = 1;

}

}

// Function for random initialization of matrix elements

void RandomDataInitialization (double* pAMatrix, double* pBMatrix,

int Size) {

int i, j; // Loop variables

srand(unsigned(clock()));

for (i=0; i<Size; i++)

for (j=0; j<Size; j++) {

pAMatrix[i*Size+j] = rand()/double(1000);

pBMatrix[i*Size+j] = rand()/double(1000);

}

}

// Function for formatted matrix output

void PrintMatrix (double* pMatrix, int RowCount, int ColCount) {

int i, j; // Loop variables

for (i=0; i<RowCount; i++) {

for (j=0; j<ColCount; j++)

printf("%7.4f ", pMatrix[i*ColCount+j]);

printf("\n");

}

}

// Function for matrix multiplication

void SerialResultCalculation (double* pAMatrix, double* pBMatrix,

double* pCMatrix, int Size) {

int i, j, k; // Loop variables

for (i=0; i<Size; i++) {

for (j=0; j<Size; j++)

for (k=0; k<Size; k++)

pCMatrix[i*Size+j] += pAMatrix[i*Size+k]*pBMatrix[k*Size+j];

}

}

// Function for block multiplication

void BlockMultiplication (double* pAblock, double* pBblock,

double* pCblock, int Size) {

SerialResultCalculation(pAblock, pBblock, pCblock, Size);

}

// Creation of two-dimensional grid communicator

// and communicators for each row and each column of the grid

void CreateGridCommunicators () {

int DimSize[2]; // Number of processes in each dimension of the grid

int Periodic[2]; // =1, if the grid dimension should be periodic

int Subdims[2]; // =1, if the grid dimension should be fixed

DimSize[0] = GridSize;

DimSize[1] = GridSize;

Periodic[0] = 0;

Periodic[1] = 0;

// Creation of the Cartesian communicator

MPI_Cart_create(MPI_COMM_WORLD, 2, DimSize, Periodic, 1, &GridComm);

// Determination of the cartesian coordinates for every process

MPI_Cart_coords(GridComm, ProcRank, 2, GridCoords);

// Creating communicators for rows

Subdims[0] = 0; // Dimensionality fixing

Subdims[1] = 1; // The presence of the given dimension in the subgrid

MPI_Cart_sub(GridComm, Subdims, &RowComm);

// Creating communicators for columns

Subdims[0] = 1;

Subdims[1] = 0;

MPI_Cart_sub(GridComm, Subdims, &ColComm);

}

// Function for memory allocation and data initialization

void ProcessInitialization (double* &pAMatrix, double* &pBMatrix,

double* &pCMatrix, double* &pAblock, double* &pBblock, double* &pCblock,

double* &pTemporaryAblock, int &Size, int &BlockSize) {

if (ProcRank == 0) {

do {

printf("\nEnter size of the initial objects: ");

scanf("%d", &Size);

if (Size%GridSize!= 0) {

printf ("Size of matricies must be divisible by the grid size!\n");

}

}

while (Size%GridSize!= 0);

}

MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);

BlockSize = Size/GridSize;

pAblock = new double [BlockSize*BlockSize];

pBblock = new double [BlockSize*BlockSize];

pCblock = new double [BlockSize*BlockSize];

pTemporaryAblock = new double [BlockSize*BlockSize];

for (int i=0; i<BlockSize*BlockSize; i++) {

pCblock[i] = 0;

}

if (ProcRank == 0) {

pAMatrix = new double [Size*Size];

pBMatrix = new double [Size*Size];

pCMatrix = new double [Size*Size];

DummyDataInitialization(pAMatrix, pBMatrix, Size);

//RandomDataInitialization(pAMatrix, pBMatrix, Size);

}

}

// Function for checkerboard matrix decomposition

void CheckerboardMatrixScatter (double* pMatrix, double* pMatrixBlock,

int Size, int BlockSize) {

double * MatrixRow = new double [BlockSize*Size];

if (GridCoords[1] == 0) {

MPI_Scatter(pMatrix, BlockSize*Size, MPI_DOUBLE, MatrixRow,

BlockSize*Size, MPI_DOUBLE, 0, ColComm);

}

for (int i=0; i<BlockSize; i++) {

MPI_Scatter(&MatrixRow[i*Size], BlockSize, MPI_DOUBLE,

&(pMatrixBlock[i*BlockSize]), BlockSize, MPI_DOUBLE, 0, RowComm);

}

delete [] MatrixRow;

}

// Data distribution among the processes

void DataDistribution (double* pAMatrix, double* pBMatrix, double*

pMatrixAblock, double* pBblock, int Size, int BlockSize) {

// Scatter the matrix among the processes of the first grid column

CheckerboardMatrixScatter(pAMatrix, pMatrixAblock, Size, BlockSize);

CheckerboardMatrixScatter(pBMatrix, pBblock, Size, BlockSize);

}

// Function for gathering the result matrix

void ResultCollection (double* pCMatrix, double* pCblock, int Size,

int BlockSize) {

double * pResultRow = new double [Size*BlockSize];

for (int i=0; i<BlockSize; i++) {

MPI_Gather(&pCblock[i*BlockSize], BlockSize, MPI_DOUBLE,

&pResultRow[i*Size], BlockSize, MPI_DOUBLE, 0, RowComm);

}

if (GridCoords[1] == 0) {

MPI_Gather(pResultRow, BlockSize*Size, MPI_DOUBLE, pCMatrix,

BlockSize*Size, MPI_DOUBLE, 0, ColComm);

}

delete [] pResultRow;

}

// Broadcasting matrix A blocks to process grid rows

void ABlockCommunication (int iter, double *pAblock, double* pMatrixAblock,

int BlockSize) {

// Defining the leading process of the process grid row

int Pivot = (GridCoords[0] + iter) % GridSize;

// Copying the transmitted block in a separate memory buffer

if (GridCoords[1] == Pivot) {

for (int i=0; i<BlockSize*BlockSize; i++)

pAblock[i] = pMatrixAblock[i];

}

// Block broadcasting

MPI_Bcast(pAblock, BlockSize*BlockSize, MPI_DOUBLE, Pivot, RowComm);

}

// Cyclic shift of matrix B blocks in the process grid columns

void BblockCommunication (double *pBblock, int BlockSize) {

MPI_Status Status;

int NextProc = GridCoords[0] + 1;

if (GridCoords[0] == GridSize-1) NextProc = 0;

int PrevProc = GridCoords[0] - 1;

if (GridCoords[0] == 0) PrevProc = GridSize-1;

MPI_Sendrecv_replace(pBblock, BlockSize*BlockSize, MPI_DOUBLE,

NextProc, 0, PrevProc, 0, ColComm, &Status);

}

void ParallelResultCalculation (double* pAblock, double* pMatrixAblock,

double* pBblock, double* pCblock, int BlockSize) {

for (int iter = 0; iter < GridSize; iter ++) {

// Sending blocks of matrix A to the process grid rows

ABlockCommunication (iter, pAblock, pMatrixAblock, BlockSize);

// Block multiplication

BlockMultiplication(pAblock, pBblock, pCblock, BlockSize);

// Cyclic shift of blocks of matrix B in process grid columns

BblockCommunication(pBblock, BlockSize);

}

}

// Test printing of the matrix block

void TestBlocks (double* pBlock, int BlockSize, char str[]) {

MPI_Barrier(MPI_COMM_WORLD);

if (ProcRank == 0) {

printf("%s \n", str);

}

for (int i=0; i<ProcNum; i++) {

if (ProcRank == i) {

printf ("ProcRank = %d \n", ProcRank);

PrintMatrix(pBlock, BlockSize, BlockSize);

}

MPI_Barrier(MPI_COMM_WORLD);

}

}

void TestResult (double* pAMatrix, double* pBMatrix, double* pCMatrix,

int Size) {

double* pSerialResult; // Result matrix of serial multiplication

double Accuracy = 1.e-6; // Comparison accuracy

int equal = 0; // =1, if the matrices are not equal

int i; // Loop variable

if (ProcRank == 0) {

pSerialResult = new double [Size*Size];

for (i=0; i<Size*Size; i++) {

pSerialResult[i] = 0;

}

BlockMultiplication(pAMatrix, pBMatrix, pSerialResult, Size);

for (i=0; i<Size*Size; i++) {

if (fabs(pSerialResult[i]-pCMatrix[i]) >= Accuracy)

equal = 1;

}

if (equal == 1)

printf("The results of serial and parallel algorithms are NOT "

" identical. Check your code.");

else

printf("The results of serial and parallel algorithms are "

" identical.");

}

}

// Function for computational process termination

void ProcessTermination (double* pAMatrix, double* pBMatrix,

double* pCMatrix, double* pAblock, double* pBblock, double* pCblock,

double* pMatrixAblock) {

if (ProcRank == 0) {

delete [] pAMatrix;

delete [] pBMatrix;

delete [] pCMatrix;

}

delete [] pAblock;

delete [] pBblock;

delete [] pCblock;

delete [] pMatrixAblock;

}

void main (int argc, char* argv[]) {

double* pAMatrix; // The first argument of matrix multiplication

double* pBMatrix; // The second argument of matrix multiplication

double* pCMatrix; // The result matrix

int Size; // Size of matricies

int BlockSize; // Sizes of matrix blocks on current process

double *pAblock; // Initial block of matrix A on current process

double *pBblock; // Initial block of matrix B on current process

double *pCblock; // Block of result matrix C on current process

double *pMatrixAblock;

double Start, Finish, Duration;

setvbuf(stdout, 0, _IONBF, 0);

MPI_Init(&argc, &argv);

MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);

MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);

GridSize = sqrt((double)ProcNum);

if (ProcNum!= GridSize*GridSize) {

if (ProcRank == 0) {

printf ("Number of processes must be a perfect square \n");

}

}

else {

if (ProcRank == 0)

printf("Parallel matrix multiplication program\n");

// Creating the cartesian grid, row and column communcators

CreateGridCommunicators();

// Memory allocation and initialization of matrix elements

ProcessInitialization (pAMatrix, pBMatrix, pCMatrix, pAblock, pBblock,

pCblock, pMatrixAblock, Size, BlockSize);

DataDistribution(pAMatrix, pBMatrix, pMatrixAblock, pBblock, Size,

BlockSize);

// Execution of Fox method

ParallelResultCalculation(pAblock, pMatrixAblock, pBblock,

pCblock, BlockSize);

ResultCollection(pCMatrix, pCblock, Size, BlockSize);

TestResult(pAMatrix, pBMatrix, pCMatrix, Size);

// Process Termination

ProcessTermination (pAMatrix, pBMatrix, pCMatrix, pAblock, pBblock,

pCblock, pMatrixAblock);

}

MPI_Finalize();

}

 

В программе можно выделить несколько функциональных модулей:





Поделиться с друзьями:


Дата добавления: 2016-07-29; Мы поможем в написании ваших работ!; просмотров: 933 | Нарушение авторских прав


Поиск на сайте:

Лучшие изречения:

Чтобы получился студенческий борщ, его нужно варить также как и домашний, только без мяса и развести водой 1:10 © Неизвестно
==> читать все изречения...

2471 - | 2352 -


© 2015-2025 lektsii.org - Контакты - Последнее добавление

Ген: 0.01 с.