56 long long int innerMask = 1LL << targetQubit;
59 long long int thisTask;
60 long long int thisPattern;
61 long long int totMask = innerMask|outerMask;
64 # pragma omp parallel \
66 shared (innerMask,outerMask,totMask,qureg,retain,numTasks, targetQubit) \
67 private (thisTask,thisPattern)
71 # pragma omp for schedule (static)
73 for (thisTask=0; thisTask<numTasks; thisTask++){
75 if ((thisPattern==innerMask) || (thisPattern==outerMask)){
86 qreal retain=1-dephase;
91 qreal retain=1-dephase;
94 long long int innerMaskQubit1 = 1LL << qubit1;
96 long long int innerMaskQubit2 = 1LL << qubit2;
98 long long int totMaskQubit1 = innerMaskQubit1|outerMaskQubit1;
99 long long int totMaskQubit2 = innerMaskQubit2|outerMaskQubit2;
101 long long int thisTask;
102 long long int thisPatternQubit1, thisPatternQubit2;
105 # pragma omp parallel \
107 shared (innerMaskQubit1,outerMaskQubit1,totMaskQubit1,innerMaskQubit2,outerMaskQubit2, \
108 totMaskQubit2,qureg,retain,numTasks) \
109 private (thisTask,thisPatternQubit1,thisPatternQubit2)
113 # pragma omp for schedule (static)
115 for (thisTask=0; thisTask<numTasks; thisTask++){
120 if ( (thisPatternQubit1==innerMaskQubit1) || (thisPatternQubit1==outerMaskQubit1) ||
121 (thisPatternQubit2==innerMaskQubit2) || (thisPatternQubit2==outerMaskQubit2) ){
132 qreal retain=1-depolLevel;
135 long long int innerMask = 1LL << targetQubit;
137 long long int totMask = innerMask|outerMask;
139 long long int thisTask;
140 long long int partner;
141 long long int thisPattern;
143 qreal realAv, imagAv;
146 # pragma omp parallel \
148 shared (innerMask,outerMask,totMask,qureg,retain,depolLevel,numTasks) \
149 private (thisTask,partner,thisPattern,realAv,imagAv)
153 # pragma omp for schedule (static)
155 for (thisTask=0; thisTask<numTasks; thisTask++){
157 if ((thisPattern==innerMask) || (thisPattern==outerMask)){
163 if ((thisTask&totMask)==0){
165 partner = thisTask | totMask;
166 realAv = (qureg.
stateVec.real[thisTask] + qureg.
stateVec.real[partner]) /2 ;
167 imagAv = (qureg.
stateVec.imag[thisTask] + qureg.
stateVec.imag[partner]) /2 ;
169 qureg.
stateVec.real[thisTask] = retain*qureg.
stateVec.real[thisTask] + depolLevel*realAv;
170 qureg.
stateVec.imag[thisTask] = retain*qureg.
stateVec.imag[thisTask] + depolLevel*imagAv;
172 qureg.
stateVec.real[partner] = retain*qureg.
stateVec.real[partner] + depolLevel*realAv;
173 qureg.
stateVec.imag[partner] = retain*qureg.
stateVec.imag[partner] + depolLevel*imagAv;
181 qreal retain=1-damping;
182 qreal dephase=sqrt(retain);
185 long long int innerMask = 1LL << targetQubit;
187 long long int totMask = innerMask|outerMask;
189 long long int thisTask;
190 long long int partner;
191 long long int thisPattern;
196 # pragma omp parallel \
198 shared (innerMask,outerMask,totMask,qureg,retain,damping,dephase,numTasks) \
199 private (thisTask,partner,thisPattern)
203 # pragma omp for schedule (static)
205 for (thisTask=0; thisTask<numTasks; thisTask++){
207 if ((thisPattern==innerMask) || (thisPattern==outerMask)){
213 if ((thisTask&totMask)==0){
215 partner = thisTask | totMask;
238 long long int sizeInnerBlock, sizeInnerHalfBlock;
239 long long int sizeOuterColumn, sizeOuterHalfColumn;
240 long long int thisInnerBlock,
243 thisIndexInOuterColumn,
244 thisIndexInInnerBlock;
247 long long int thisTask;
251 sizeInnerHalfBlock = 1LL << targetQubit;
252 sizeInnerBlock = 2LL * sizeInnerHalfBlock;
254 sizeOuterHalfColumn = sizeOuterColumn >> 1;
257 # pragma omp parallel \
259 shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
260 qureg,depolLevel,numTasks,targetQubit) \
261 private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
262 thisIndexInInnerBlock,outerBit)
266 # pragma omp for schedule (static)
273 for (thisTask=0; thisTask<numTasks; thisTask++) {
276 thisOuterColumn = thisTask / sizeOuterHalfColumn;
277 thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1);
278 thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
280 thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1);
281 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
282 + thisIndexInInnerBlock;
287 thisIndex += outerBit*(sizeInnerHalfBlock);
297 qureg.
stateVec.real[thisIndex] = (1-depolLevel)*qureg.
stateVec.real[thisIndex] +
300 qureg.
stateVec.imag[thisIndex] = (1-depolLevel)*qureg.
stateVec.imag[thisIndex] +
307 qreal retain=1-damping;
308 qreal dephase=sqrt(1-damping);
315 long long int sizeInnerBlock, sizeInnerHalfBlock;
316 long long int sizeOuterColumn, sizeOuterHalfColumn;
317 long long int thisInnerBlock,
320 thisIndexInOuterColumn,
321 thisIndexInInnerBlock;
325 long long int thisTask;
329 sizeInnerHalfBlock = 1LL << targetQubit;
330 sizeInnerBlock = 2LL * sizeInnerHalfBlock;
332 sizeOuterHalfColumn = sizeOuterColumn >> 1;
335 # pragma omp parallel \
337 shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
338 qureg,damping, retain, dephase, numTasks,targetQubit) \
339 private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
340 thisIndexInInnerBlock,outerBit, stateBit)
344 # pragma omp for schedule (static)
351 for (thisTask=0; thisTask<numTasks; thisTask++) {
354 thisOuterColumn = thisTask / sizeOuterHalfColumn;
355 thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1);
356 thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
358 thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1);
359 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
360 + thisIndexInInnerBlock;
365 thisIndex += outerBit*(sizeInnerHalfBlock);
395 long long int innerMaskQubit1 = 1LL << qubit1;
397 long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
398 long long int innerMaskQubit2 = 1LL << qubit2;
400 long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
402 long long int thisTask;
403 long long int partner;
404 long long int thisPatternQubit1, thisPatternQubit2;
406 qreal real00, imag00;
409 # pragma omp parallel \
411 shared (totMaskQubit1,totMaskQubit2,qureg,delta,gamma,numTasks) \
412 private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
416 # pragma omp for schedule (static)
419 for (thisTask=0; thisTask<numTasks; thisTask++){
422 if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
423 || (thisPatternQubit2==totMaskQubit2))){
425 partner = thisTask | totMaskQubit1;
426 real00 = qureg.
stateVec.real[thisTask];
427 imag00 = qureg.
stateVec.imag[thisTask];
430 + delta*qureg.
stateVec.real[partner];
432 + delta*qureg.
stateVec.imag[partner];
440 # pragma omp for schedule (static)
443 for (thisTask=0; thisTask<numTasks; thisTask++){
446 if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
447 || (thisPatternQubit1==totMaskQubit1))){
449 partner = thisTask | totMaskQubit2;
450 real00 = qureg.
stateVec.real[thisTask];
451 imag00 = qureg.
stateVec.imag[thisTask];
454 + delta*qureg.
stateVec.real[partner];
456 + delta*qureg.
stateVec.imag[partner];
465 # pragma omp for schedule (static)
468 for (thisTask=0; thisTask<numTasks; thisTask++){
471 if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
472 || (thisPatternQubit1==totMaskQubit1))){
474 partner = thisTask | totMaskQubit2;
475 partner = partner ^ totMaskQubit1;
476 real00 = qureg.
stateVec.real[thisTask];
477 imag00 = qureg.
stateVec.imag[thisTask];
480 + delta*qureg.
stateVec.real[partner]);
482 + delta*qureg.
stateVec.imag[partner]);
496 long long int innerMaskQubit1 = 1LL << qubit1;
498 long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
499 long long int innerMaskQubit2 = 1LL << qubit2;
501 long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
506 long long int thisTask;
507 long long int partner;
508 long long int thisPatternQubit1, thisPatternQubit2;
510 qreal real00, imag00;
513 # pragma omp parallel \
515 shared (totMaskQubit1,totMaskQubit2,qureg,delta,numTasks) \
516 private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
521 # pragma omp for schedule (static)
524 for (thisTask=0; thisTask<numTasks; thisTask ++){
527 if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
528 || (thisPatternQubit2==totMaskQubit2))){
530 partner = thisTask | totMaskQubit1;
531 real00 = qureg.
stateVec.real[thisTask];
532 imag00 = qureg.
stateVec.imag[thisTask];
535 + delta*qureg.
stateVec.real[partner];
537 + delta*qureg.
stateVec.imag[partner];
550 long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
551 long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
552 long long int sizeOuterColumn, sizeOuterQuarterColumn;
553 long long int thisInnerBlockQ2,
556 thisIndexInOuterColumn,
557 thisIndexInInnerBlockQ1,
558 thisIndexInInnerBlockQ2,
559 thisInnerBlockQ1InInnerBlockQ2;
560 int outerBitQ1, outerBitQ2;
562 long long int thisTask;
566 sizeInnerHalfBlockQ1 = 1LL << targetQubit;
567 sizeInnerHalfBlockQ2 = 1LL << qubit2;
568 sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
569 sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
570 sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
572 sizeOuterQuarterColumn = sizeOuterColumn >> 2;
575 # pragma omp parallel \
577 shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
578 sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma,numTasks,targetQubit,qubit2) \
579 private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
580 thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
581 thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
585 # pragma omp for schedule (static)
592 for (thisTask=0; thisTask<numTasks; thisTask++) {
595 thisOuterColumn = thisTask / sizeOuterQuarterColumn;
597 thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
598 thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
600 thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
601 thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
603 thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
606 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
607 + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
613 thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
619 thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
641 long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
642 long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
643 long long int sizeOuterColumn, sizeOuterQuarterColumn;
644 long long int thisInnerBlockQ2,
647 thisIndexInPairVector,
648 thisIndexInOuterColumn,
649 thisIndexInInnerBlockQ1,
650 thisIndexInInnerBlockQ2,
651 thisInnerBlockQ1InInnerBlockQ2;
652 int outerBitQ1, outerBitQ2;
654 long long int thisTask;
658 sizeInnerHalfBlockQ1 = 1LL << targetQubit;
659 sizeInnerHalfBlockQ2 = 1LL << qubit2;
660 sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
661 sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
662 sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
664 sizeOuterQuarterColumn = sizeOuterColumn >> 2;
668 # pragma omp parallel \
670 shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
671 sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma, numTasks,targetQubit,qubit2) \
672 private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
673 thisOuterColumn,thisIndex,thisIndexInPairVector,thisIndexInOuterColumn, \
674 thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
678 # pragma omp for schedule (static)
686 for (thisTask=0; thisTask<numTasks; thisTask++) {
689 thisOuterColumn = thisTask / sizeOuterQuarterColumn;
691 thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
692 thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
694 thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
695 thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
697 thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
700 thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
701 + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
707 thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
711 thisIndexInPairVector = thisTask + (1-outerBitQ1)*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn -
712 outerBitQ1*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn;
718 thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
743 # pragma omp parallel for schedule (static)
745 for (i=startInd; i < startInd+numAmps; i++) {
753 # pragma omp parallel for schedule (static)
755 for (i=startInd; i < startInd+numAmps; i++) {
762 long long int startAmpInd,
long long int numAmps,
long long int blockSize
764 long long int numDubBlocks = numAmps / (2*blockSize);
765 long long int blockStartInd;
768 long long int dubBlockInd;
770 # pragma omp parallel for schedule (static) private (blockStartInd)
772 for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
773 blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
775 zeroSomeAmps( qureg, blockStartInd + blockSize, blockSize);
778 long long int dubBlockInd;
780 # pragma omp parallel for schedule (static) private (blockStartInd)
782 for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
783 blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
798 long long int innerBlockSize = (1LL << measureQubit);
804 long long int globalStartInd = qureg.
chunkId * locNumAmps;
805 int innerBit =
extractBit(measureQubit, globalStartInd);
809 if (locNumAmps <= outerBlockSize) {
812 if (outerBit != outcome) {
818 if (locNumAmps <= innerBlockSize) {
821 if (innerBit != outcome)
832 qureg, totalStateProb, innerBit==outcome, 0, qureg.
numAmpsPerChunk, innerBlockSize);
837 long long int numOuterDoubleBlocks = locNumAmps / (2*outerBlockSize);
838 long long int firstBlockInd;
843 if (outerBit == outcome) {
845 for (
long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
846 firstBlockInd = outerDubBlockInd*2*outerBlockSize;
850 qureg, totalStateProb, innerBit==outcome,
851 firstBlockInd, outerBlockSize, innerBlockSize);
854 zeroSomeAmps(qureg, firstBlockInd + outerBlockSize, outerBlockSize);
859 for (
long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
860 firstBlockInd = outerDubBlockInd*2*outerBlockSize;
865 qureg, totalStateProb, innerBit==outcome,
866 firstBlockInd + outerBlockSize, outerBlockSize, innerBlockSize);
883 # pragma omp parallel \
884 shared (vecRe, vecIm, numAmps) \
886 reduction ( +:trace )
890 # pragma omp for schedule (static)
892 for (index=0LL; index<numAmps; index++) {
894 trace += vecRe[index]*vecRe[index] + vecIm[index]*vecIm[index];
914 # pragma omp parallel \
916 shared (combineVecRe,combineVecIm,otherVecRe,otherVecIm, otherProb, numAmps) \
921 # pragma omp for schedule (static)
923 for (index=0; index < numAmps; index++) {
924 combineVecRe[index] *= 1-otherProb;
925 combineVecIm[index] *= 1-otherProb;
927 combineVecRe[index] += otherProb * otherVecRe[index];
928 combineVecIm[index] += otherProb * otherVecIm[index];
948 # pragma omp parallel \
949 shared (aRe,aIm, bRe,bIm, numAmps) \
950 private (index,difRe,difIm) \
951 reduction ( +:trace )
955 # pragma omp for schedule (static)
957 for (index=0LL; index<numAmps; index++) {
959 difRe = aRe[index] - bRe[index];
960 difIm = aIm[index] - bIm[index];
961 trace += difRe*difRe + difIm*difIm;
982 # pragma omp parallel \
983 shared (aRe,aIm, bRe,bIm, numAmps) \
985 reduction ( +:trace )
989 # pragma omp for schedule (static)
991 for (index=0LL; index<numAmps; index++) {
992 trace += aRe[index]*bRe[index] + aIm[index]*bIm[index];
1032 qreal densElemRe, densElemIm;
1033 qreal prefacRe, prefacIm;
1034 qreal rowSumRe, rowSumIm;
1035 qreal vecElemRe, vecElemIm;
1038 qreal globalSumRe = 0;
1041 # pragma omp parallel \
1042 shared (vecRe,vecIm,densRe,densIm, dim,colsPerNode,startCol) \
1043 private (row,col, prefacRe,prefacIm, rowSumRe,rowSumIm, densElemRe,densElemIm, vecElemRe,vecElemIm) \
1044 reduction ( +:globalSumRe )
1048 # pragma omp for schedule (static)
1051 for (row=0; row < dim; row++) {
1054 prefacRe = vecRe[row];
1055 prefacIm = - vecIm[row];
1061 for (col=0; col < colsPerNode; col++) {
1064 densElemRe = densRe[row + dim*col];
1065 densElemIm = densIm[row + dim*col];
1068 vecElemRe = vecRe[startCol + col];
1069 vecElemIm = vecIm[startCol + col];
1071 rowSumRe += densElemRe*vecElemRe - densElemIm*vecElemIm;
1072 rowSumIm += densElemRe*vecElemIm + densElemIm*vecElemRe;
1075 globalSumRe += rowSumRe*prefacRe - rowSumIm*prefacIm;
1084 qreal innerProdReal = 0;
1085 qreal innerProdImag = 0;
1087 long long int index;
1094 qreal braRe, braIm, ketRe, ketIm;
1097 # pragma omp parallel \
1098 shared (braVecReal, braVecImag, ketVecReal, ketVecImag, numAmps) \
1099 private (index, braRe, braIm, ketRe, ketIm) \
1100 reduction ( +:innerProdReal, innerProdImag )
1104 # pragma omp for schedule (static)
1106 for (index=0; index < numAmps; index++) {
1107 braRe = braVecReal[index];
1108 braIm = braVecImag[index];
1109 ketRe = ketVecReal[index];
1110 ketIm = ketVecImag[index];
1113 innerProdReal += braRe*ketRe + braIm*ketIm;
1114 innerProdImag += braRe*ketIm - braIm*ketRe;
1119 innerProd.
real = innerProdReal;
1120 innerProd.
imag = innerProdImag;
1136 long long int index;
1138 # pragma omp parallel \
1140 shared (densityNumElems, densityReal, densityImag) \
1145 # pragma omp for schedule (static)
1147 for (index=0; index<densityNumElems; index++) {
1148 densityReal[index] = 0.0;
1149 densityImag[index] = 0.0;
1155 long long int densityInd = (densityDim + 1)*stateInd;
1158 if (qureg.
chunkId == densityInd / densityNumElems){
1159 densityReal[densityInd % densityNumElems] = 1.0;
1160 densityImag[densityInd % densityNumElems] = 0.0;
1175 long long int index;
1179 # pragma omp parallel \
1181 shared (chunkSize, densityReal, densityImag, probFactor) \
1186 # pragma omp for schedule (static)
1188 for (index=0; index<chunkSize; index++) {
1189 densityReal[index] = probFactor;
1190 densityImag[index] = 0.0;
1212 long long int col, row, index;
1215 qreal ketRe, ketIm, braRe, braIm;
1218 # pragma omp parallel \
1220 shared (colOffset, colsPerNode,rowsPerNode, vecRe,vecIm,densRe,densIm) \
1221 private (col,row, ketRe,ketIm,braRe,braIm, index)
1225 # pragma omp for schedule (static)
1228 for (col=0; col < colsPerNode; col++) {
1231 for (row=0; row < rowsPerNode; row++) {
1236 braRe = vecRe[col + colOffset];
1237 braIm = - vecIm[col + colOffset];
1240 index = row + col*rowsPerNode;
1241 densRe[index] = ketRe*braRe - ketIm*braIm;
1242 densIm[index] = ketRe*braIm + ketIm*braRe;
1255 long long int localEndInd = localStartInd + numAmps;
1261 if (localStartInd < 0)
1268 long long int index;
1273 # pragma omp parallel \
1275 shared (localStartInd,localEndInd, vecRe,vecIm, reals,imags, offset) \
1280 # pragma omp for schedule (static)
1283 for (index=localStartInd; index < localEndInd; index++) {
1284 vecRe[index] = reals[index + offset];
1285 vecIm[index] = imags[index + offset];
1292 long long int numAmps = 1LL << numQubits;
1293 long long int numAmpsPerRank = numAmps/env.
numRanks;
1295 if (numAmpsPerRank > SIZE_MAX) {
1296 printf(
"Could not allocate memory (cannot fit numAmps into size_t)!");
1297 exit (EXIT_FAILURE);
1300 size_t arrSize = (size_t) (numAmpsPerRank *
sizeof(*(qureg->
stateVec.real)));
1301 qureg->
stateVec.real = malloc(arrSize);
1302 qureg->
stateVec.imag = malloc(arrSize);
1309 && numAmpsPerRank ) {
1310 printf(
"Could not allocate memory!");
1311 exit (EXIT_FAILURE);
1315 && numAmpsPerRank ) {
1316 printf(
"Could not allocate memory!");
1317 exit (EXIT_FAILURE);
1361 printf(
"Could not allocate memory!\n");
1396 long long int i, globalInd;
1398 int t, q, isOddNumOnes, sign;
1401 # pragma omp parallel \
1403 shared (offset,numElems, opRe,opIm, numTerms,numQubits,coeffs,codes) \
1404 private (i,globalInd, elem, isOddNumOnes,t,q,sign)
1408 # pragma omp for schedule (static)
1410 for (i=0; i<numElems; i++) {
1412 globalInd = i + offset;
1416 for (t=0; t<numTerms; t++) {
1420 for (q=0; q<numQubits; q++)
1421 if (codes[q + t*numQubits] ==
PAULI_Z)
1423 isOddNumOnes = !isOddNumOnes;
1426 sign = 1 - 2*isOddNumOnes;
1427 elem += coeffs[t] * sign;
1440 long long int index;
1443 for (rank=0; rank<qureg.
numChunks; rank++){
1446 printf(
"Reporting state from rank %d [\n", qureg.
chunkId);
1447 printf(
"real, imag\n");
1448 }
else if (rank==0) {
1449 printf(
"Reporting state [\n");
1450 printf(
"real, imag\n");
1455 printf(REAL_STRING_FORMAT
", " REAL_STRING_FORMAT
"\n", qureg.
stateVec.real[index], qureg.
stateVec.imag[index]);
1457 if (reportRank || rank==qureg.
numChunks-1) printf(
"]\n");
1461 }
else printf(
"Error: reportStateToScreen will not print output for systems of more than 5 qubits.\n");
1466 long long int stateVecSize;
1467 long long int index;
1478 # pragma omp parallel \
1480 shared (stateVecSize, stateVecReal, stateVecImag) \
1485 # pragma omp for schedule (static)
1487 for (index=0; index<stateVecSize; index++) {
1488 stateVecReal[index] = 0.0;
1489 stateVecImag[index] = 0.0;
1506 long long int chunkSize, stateVecSize;
1507 long long int index;
1511 stateVecSize = chunkSize*qureg.
numChunks;
1512 qreal normFactor = 1.0/sqrt((
qreal)stateVecSize);
1520 # pragma omp parallel \
1522 shared (chunkSize, stateVecReal, stateVecImag, normFactor) \
1527 # pragma omp for schedule (static)
1529 for (index=0; index<chunkSize; index++) {
1530 stateVecReal[index] = normFactor;
1531 stateVecImag[index] = 0.0;
1538 long long int stateVecSize;
1539 long long int index;
1550 # pragma omp parallel \
1552 shared (stateVecSize, stateVecReal, stateVecImag) \
1557 # pragma omp for schedule (static)
1559 for (index=0; index<stateVecSize; index++) {
1560 stateVecReal[index] = 0.0;
1561 stateVecImag[index] = 0.0;
1566 if (qureg.
chunkId == stateInd/stateVecSize){
1567 stateVecReal[stateInd % stateVecSize] = 1.0;
1568 stateVecImag[stateInd % stateVecSize] = 0.0;
1575 long long int stateVecSize;
1576 long long int index;
1589 # pragma omp parallel \
1591 shared (stateVecSize, targetStateVecReal, targetStateVecImag, copyStateVecReal, copyStateVecImag) \
1596 # pragma omp for schedule (static)
1598 for (index=0; index<stateVecSize; index++) {
1599 targetStateVecReal[index] = copyStateVecReal[index];
1600 targetStateVecImag[index] = copyStateVecImag[index];
1613 long long int chunkSize, stateVecSize;
1614 long long int index;
1616 long long int chunkId=qureg->
chunkId;
1620 stateVecSize = chunkSize*qureg->
numChunks;
1621 qreal normFactor = 1.0/sqrt((
qreal)stateVecSize/2.0);
1629 # pragma omp parallel \
1631 shared (chunkSize, stateVecReal, stateVecImag, normFactor, qubitId, outcome, chunkId) \
1632 private (index, bit)
1636 # pragma omp for schedule (static)
1638 for (index=0; index<chunkSize; index++) {
1639 bit =
extractBit(qubitId, index+chunkId*chunkSize);
1641 stateVecReal[index] = normFactor;
1642 stateVecImag[index] = 0.0;
1644 stateVecReal[index] = 0.0;
1645 stateVecImag[index] = 0.0;
1659 long long int chunkSize;
1660 long long int index;
1661 long long int indexOffset;
1670 indexOffset = chunkSize * qureg.
chunkId;
1674 # pragma omp parallel \
1676 shared (chunkSize, stateVecReal, stateVecImag, indexOffset) \
1681 # pragma omp for schedule (static)
1683 for (index=0; index<chunkSize; index++) {
1684 stateVecReal[index] = ((indexOffset + index)*2.0)/10.0;
1685 stateVecImag[index] = ((indexOffset + index)*2.0+1.0)/10.0;
1692 long long int chunkSize, stateVecSize;
1693 long long int indexInChunk, totalIndex;
1696 stateVecSize = chunkSize*qureg->
numChunks;
1704 for (
int rank=0; rank<(qureg->
numChunks); rank++){
1706 fp = fopen(filename,
"r");
1712 indexInChunk = 0; totalIndex = 0;
1713 while (fgets(line,
sizeof(
char)*200, fp) != NULL && totalIndex<stateVecSize){
1715 int chunkId = (int) (totalIndex/chunkSize);
1718 sscanf(line,
"%f, %f", &(stateVecReal[indexInChunk]),
1719 &(stateVecImag[indexInChunk]));
1720 # elif QuEST_PREC==2
1721 sscanf(line,
"%lf, %lf", &(stateVecReal[indexInChunk]),
1722 &(stateVecImag[indexInChunk]));
1723 # elif QuEST_PREC==4
1724 sscanf(line,
"%Lf, %Lf", &(stateVecReal[indexInChunk]),
1725 &(stateVecImag[indexInChunk]));
1745 for (
long long int i=0; i<chunkSize; i++){
1747 if (diff>precision)
return 0;
1749 if (diff>precision)
return 0;
1756 long long int sizeBlock, sizeHalfBlock;
1757 long long int thisBlock,
1760 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
1761 long long int thisTask;
1765 sizeHalfBlock = 1LL << targetQubit;
1766 sizeBlock = 2LL * sizeHalfBlock;
1775 # pragma omp parallel \
1777 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, numTasks) \
1778 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
1782 # pragma omp for schedule (static)
1784 for (thisTask=0; thisTask<numTasks; thisTask++) {
1786 thisBlock = thisTask / sizeHalfBlock;
1787 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
1788 indexLo = indexUp + sizeHalfBlock;
1791 stateRealUp = stateVecReal[indexUp];
1792 stateImagUp = stateVecImag[indexUp];
1794 stateRealLo = stateVecReal[indexLo];
1795 stateImagLo = stateVecImag[indexLo];
1798 stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
1799 - betaReal*stateRealLo - betaImag*stateImagLo;
1800 stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
1801 - betaReal*stateImagLo + betaImag*stateRealLo;
1804 stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
1805 + alphaReal*stateRealLo + alphaImag*stateImagLo;
1806 stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
1807 + alphaReal*stateImagLo - alphaImag*stateRealLo;
1823 long long int thisTask;
1824 long long int thisGlobalInd00;
1825 long long int ind00, ind01, ind10, ind11;
1826 qreal re00, re01, re10, re11;
1827 qreal im00, im01, im10, im11;
1830 # pragma omp parallel \
1832 shared (reVec,imVec,globalIndStart,numTasks,ctrlMask,u,q2,q1) \
1833 private (thisTask, thisGlobalInd00, ind00,ind01,ind10,ind11, re00,re01,re10,re11, im00,im01,im10,im11)
1837 # pragma omp for schedule (static)
1839 for (thisTask=0; thisTask<numTasks; thisTask++) {
1845 thisGlobalInd00 = ind00 + globalIndStart;
1846 if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1855 re00 = reVec[ind00]; im00 = imVec[ind00];
1856 re01 = reVec[ind01]; im01 = imVec[ind01];
1857 re10 = reVec[ind10]; im10 = imVec[ind10];
1858 re11 = reVec[ind11]; im11 = imVec[ind11];
1862 u.
real[0][0]*re00 - u.
imag[0][0]*im00 +
1863 u.
real[0][1]*re01 - u.
imag[0][1]*im01 +
1864 u.
real[0][2]*re10 - u.
imag[0][2]*im10 +
1865 u.
real[0][3]*re11 - u.
imag[0][3]*im11;
1867 u.
imag[0][0]*re00 + u.
real[0][0]*im00 +
1868 u.
imag[0][1]*re01 + u.
real[0][1]*im01 +
1869 u.
imag[0][2]*re10 + u.
real[0][2]*im10 +
1870 u.
imag[0][3]*re11 + u.
real[0][3]*im11;
1873 u.
real[1][0]*re00 - u.
imag[1][0]*im00 +
1874 u.
real[1][1]*re01 - u.
imag[1][1]*im01 +
1875 u.
real[1][2]*re10 - u.
imag[1][2]*im10 +
1876 u.
real[1][3]*re11 - u.
imag[1][3]*im11;
1878 u.
imag[1][0]*re00 + u.
real[1][0]*im00 +
1879 u.
imag[1][1]*re01 + u.
real[1][1]*im01 +
1880 u.
imag[1][2]*re10 + u.
real[1][2]*im10 +
1881 u.
imag[1][3]*re11 + u.
real[1][3]*im11;
1884 u.
real[2][0]*re00 - u.
imag[2][0]*im00 +
1885 u.
real[2][1]*re01 - u.
imag[2][1]*im01 +
1886 u.
real[2][2]*re10 - u.
imag[2][2]*im10 +
1887 u.
real[2][3]*re11 - u.
imag[2][3]*im11;
1889 u.
imag[2][0]*re00 + u.
real[2][0]*im00 +
1890 u.
imag[2][1]*re01 + u.
real[2][1]*im01 +
1891 u.
imag[2][2]*re10 + u.
real[2][2]*im10 +
1892 u.
imag[2][3]*re11 + u.
real[2][3]*im11;
1895 u.
real[3][0]*re00 - u.
imag[3][0]*im00 +
1896 u.
real[3][1]*re01 - u.
imag[3][1]*im01 +
1897 u.
real[3][2]*re10 - u.
imag[3][2]*im10 +
1898 u.
real[3][3]*re11 - u.
imag[3][3]*im11;
1900 u.
imag[3][0]*re00 + u.
real[3][0]*im00 +
1901 u.
imag[3][1]*re01 + u.
real[3][1]*im01 +
1902 u.
imag[3][2]*re10 + u.
real[3][2]*im10 +
1903 u.
imag[3][3]*re11 + u.
real[3][3]*im11;
1909 return *(
int*)a - *(
int*)b;
1919 long long int numTargAmps = 1 << u.
numQubits;
1924 long long int thisTask;
1925 long long int thisInd00;
1926 long long int thisGlobalInd00;
1929 qreal reElem, imElem;
1936 long long int ampInds[numTargAmps];
1937 qreal reAmps[numTargAmps];
1938 qreal imAmps[numTargAmps];
1940 int sortedTargs[numTargs];
1943 long long int* ampInds;
1946 int* sortedTargs = (
int*) _malloca(numTargs *
sizeof *sortedTargs);
1951 for (
int t=0; t < numTargs; t++)
1952 sortedTargs[t] = targs[t];
1953 qsort(sortedTargs, numTargs,
sizeof(
int),
qsortComp);
1956 # pragma omp parallel \
1958 shared (reVec,imVec, numTasks,numTargAmps,globalIndStart, ctrlMask,targs,sortedTargs,u,numTargs) \
1959 private (thisTask,thisInd00,thisGlobalInd00,ind,i,t,r,c,reElem,imElem, ampInds,reAmps,imAmps)
1965 ampInds = (
long long int*) _malloca(numTargAmps *
sizeof *ampInds);
1966 reAmps = (
qreal*) _malloca(numTargAmps *
sizeof *reAmps);
1967 imAmps = (
qreal*) _malloca(numTargAmps *
sizeof *imAmps);
1970 # pragma omp for schedule (static)
1972 for (thisTask=0; thisTask<numTasks; thisTask++) {
1975 thisInd00 = thisTask;
1976 for (t=0; t < numTargs; t++)
1980 thisGlobalInd00 = thisInd00 + globalIndStart;
1981 if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1985 for (i=0; i < numTargAmps; i++) {
1989 for (t=0; t < numTargs; t++)
1995 reAmps [i] = reVec[ind];
1996 imAmps [i] = imVec[ind];
2000 for (r=0; r < numTargAmps; r++) {
2005 for (c=0; c < numTargAmps; c++) {
2006 reElem = u.
real[r][c];
2007 imElem = u.
imag[r][c];
2008 reVec[ind] += reAmps[c]*reElem - imAmps[c]*imElem;
2009 imVec[ind] += reAmps[c]*imElem + imAmps[c]*reElem;
2022 _freea(sortedTargs);
2028 long long int sizeBlock, sizeHalfBlock;
2029 long long int thisBlock,
2032 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2033 long long int thisTask;
2037 sizeHalfBlock = 1LL << targetQubit;
2038 sizeBlock = 2LL * sizeHalfBlock;
2045 # pragma omp parallel \
2047 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks) \
2048 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2052 # pragma omp for schedule (static)
2054 for (thisTask=0; thisTask<numTasks; thisTask++) {
2056 thisBlock = thisTask / sizeHalfBlock;
2057 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2058 indexLo = indexUp + sizeHalfBlock;
2061 stateRealUp = stateVecReal[indexUp];
2062 stateImagUp = stateVecImag[indexUp];
2064 stateRealLo = stateVecReal[indexLo];
2065 stateImagLo = stateVecImag[indexLo];
2069 stateVecReal[indexUp] = u.
real[0][0]*stateRealUp - u.
imag[0][0]*stateImagUp
2070 + u.
real[0][1]*stateRealLo - u.
imag[0][1]*stateImagLo;
2071 stateVecImag[indexUp] = u.
real[0][0]*stateImagUp + u.
imag[0][0]*stateRealUp
2072 + u.
real[0][1]*stateImagLo + u.
imag[0][1]*stateRealLo;
2075 stateVecReal[indexLo] = u.
real[1][0]*stateRealUp - u.
imag[1][0]*stateImagUp
2076 + u.
real[1][1]*stateRealLo - u.
imag[1][1]*stateImagLo;
2077 stateVecImag[indexLo] = u.
real[1][0]*stateImagUp + u.
imag[1][0]*stateRealUp
2078 + u.
real[1][1]*stateImagLo + u.
imag[1][1]*stateRealLo;
2097 ComplexArray stateVecUp,
2098 ComplexArray stateVecLo,
2099 ComplexArray stateVecOut)
2102 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2103 long long int thisTask;
2108 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2109 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2110 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2113 # pragma omp parallel \
2115 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2116 rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks) \
2117 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2121 # pragma omp for schedule (static)
2123 for (thisTask=0; thisTask<numTasks; thisTask++) {
2125 stateRealUp = stateVecRealUp[thisTask];
2126 stateImagUp = stateVecImagUp[thisTask];
2128 stateRealLo = stateVecRealLo[thisTask];
2129 stateImagLo = stateVecImagLo[thisTask];
2132 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2133 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2153 ComplexArray stateVecUp,
2154 ComplexArray stateVecLo,
2155 ComplexArray stateVecOut)
2158 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2159 long long int thisTask;
2164 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2165 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2166 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2170 # pragma omp parallel \
2172 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2173 rot1Real, rot1Imag, rot2Real, rot2Imag,numTasks) \
2174 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2178 # pragma omp for schedule (static)
2180 for (thisTask=0; thisTask<numTasks; thisTask++) {
2182 stateRealUp = stateVecRealUp[thisTask];
2183 stateImagUp = stateVecImagUp[thisTask];
2185 stateRealLo = stateVecRealLo[thisTask];
2186 stateImagLo = stateVecImagLo[thisTask];
2188 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2189 + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2190 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2191 + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2199 long long int sizeBlock, sizeHalfBlock;
2200 long long int thisBlock,
2203 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2204 long long int thisTask;
2207 long long int chunkId=qureg.
chunkId;
2212 sizeHalfBlock = 1LL << targetQubit;
2213 sizeBlock = 2LL * sizeHalfBlock;
2222 # pragma omp parallel \
2224 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, \
2225 numTasks,chunkId,chunkSize,controlQubit) \
2226 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2230 # pragma omp for schedule (static)
2232 for (thisTask=0; thisTask<numTasks; thisTask++) {
2234 thisBlock = thisTask / sizeHalfBlock;
2235 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2236 indexLo = indexUp + sizeHalfBlock;
2238 controlBit =
extractBit (controlQubit, indexUp+chunkId*chunkSize);
2241 stateRealUp = stateVecReal[indexUp];
2242 stateImagUp = stateVecImag[indexUp];
2244 stateRealLo = stateVecReal[indexLo];
2245 stateImagLo = stateVecImag[indexLo];
2248 stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
2249 - betaReal*stateRealLo - betaImag*stateImagLo;
2250 stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
2251 - betaReal*stateImagLo + betaImag*stateRealLo;
2254 stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
2255 + alphaReal*stateRealLo + alphaImag*stateImagLo;
2256 stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
2257 + alphaReal*stateImagLo - alphaImag*stateRealLo;
2269 Qureg qureg,
int targetQubit,
2270 long long int ctrlQubitsMask,
long long int ctrlFlipMask,
2273 long long int sizeBlock, sizeHalfBlock;
2274 long long int thisBlock,
2277 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2278 long long int thisTask;
2281 long long int chunkId=qureg.
chunkId;
2284 sizeHalfBlock = 1LL << targetQubit;
2285 sizeBlock = 2LL * sizeHalfBlock;
2292 # pragma omp parallel \
2294 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u, ctrlQubitsMask,ctrlFlipMask, \
2295 numTasks,chunkId,chunkSize) \
2296 private (thisTask,thisBlock, indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2300 # pragma omp for schedule (static)
2302 for (thisTask=0; thisTask<numTasks; thisTask++) {
2304 thisBlock = thisTask / sizeHalfBlock;
2305 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2306 indexLo = indexUp + sizeHalfBlock;
2311 if (ctrlQubitsMask == (ctrlQubitsMask & ((indexUp+chunkId*chunkSize) ^ ctrlFlipMask))) {
2313 stateRealUp = stateVecReal[indexUp];
2314 stateImagUp = stateVecImag[indexUp];
2316 stateRealLo = stateVecReal[indexLo];
2317 stateImagLo = stateVecImag[indexLo];
2320 stateVecReal[indexUp] = u.
real[0][0]*stateRealUp - u.
imag[0][0]*stateImagUp
2321 + u.
real[0][1]*stateRealLo - u.
imag[0][1]*stateImagLo;
2322 stateVecImag[indexUp] = u.
real[0][0]*stateImagUp + u.
imag[0][0]*stateRealUp
2323 + u.
real[0][1]*stateImagLo + u.
imag[0][1]*stateRealLo;
2326 stateVecReal[indexLo] = u.
real[1][0]*stateRealUp - u.
imag[1][0]*stateImagUp
2327 + u.
real[1][1]*stateRealLo - u.
imag[1][1]*stateImagLo;
2328 stateVecImag[indexLo] = u.
real[1][0]*stateImagUp + u.
imag[1][0]*stateRealUp
2329 + u.
real[1][1]*stateImagLo + u.
imag[1][1]*stateRealLo;
2339 long long int sizeBlock, sizeHalfBlock;
2340 long long int thisBlock,
2343 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2344 long long int thisTask;
2347 long long int chunkId=qureg.
chunkId;
2352 sizeHalfBlock = 1LL << targetQubit;
2353 sizeBlock = 2LL * sizeHalfBlock;
2360 # pragma omp parallel \
2362 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks,chunkId,chunkSize,controlQubit) \
2363 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2367 # pragma omp for schedule (static)
2369 for (thisTask=0; thisTask<numTasks; thisTask++) {
2371 thisBlock = thisTask / sizeHalfBlock;
2372 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2373 indexLo = indexUp + sizeHalfBlock;
2375 controlBit =
extractBit (controlQubit, indexUp+chunkId*chunkSize);
2378 stateRealUp = stateVecReal[indexUp];
2379 stateImagUp = stateVecImag[indexUp];
2381 stateRealLo = stateVecReal[indexLo];
2382 stateImagLo = stateVecImag[indexLo];
2386 stateVecReal[indexUp] = u.
real[0][0]*stateRealUp - u.
imag[0][0]*stateImagUp
2387 + u.
real[0][1]*stateRealLo - u.
imag[0][1]*stateImagLo;
2388 stateVecImag[indexUp] = u.
real[0][0]*stateImagUp + u.
imag[0][0]*stateRealUp
2389 + u.
real[0][1]*stateImagLo + u.
imag[0][1]*stateRealLo;
2392 stateVecReal[indexLo] = u.
real[1][0]*stateRealUp - u.
imag[1][0]*stateImagUp
2393 + u.
real[1][1]*stateRealLo - u.
imag[1][1]*stateImagLo;
2394 stateVecImag[indexLo] = u.
real[1][0]*stateImagUp + u.
imag[1][0]*stateRealUp
2395 + u.
real[1][1]*stateImagLo + u.
imag[1][1]*stateRealLo;
2416 ComplexArray stateVecUp,
2417 ComplexArray stateVecLo,
2418 ComplexArray stateVecOut)
2421 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2422 long long int thisTask;
2425 long long int chunkId=qureg.
chunkId;
2431 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2432 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2433 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2436 # pragma omp parallel \
2438 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2439 rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks,chunkId,chunkSize,controlQubit) \
2440 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2444 # pragma omp for schedule (static)
2446 for (thisTask=0; thisTask<numTasks; thisTask++) {
2447 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
2450 stateRealUp = stateVecRealUp[thisTask];
2451 stateImagUp = stateVecImagUp[thisTask];
2453 stateRealLo = stateVecRealLo[thisTask];
2454 stateImagLo = stateVecImagLo[thisTask];
2457 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2458 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2478 ComplexArray stateVecUp,
2479 ComplexArray stateVecLo,
2480 ComplexArray stateVecOut)
2483 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2484 long long int thisTask;
2487 long long int chunkId=qureg.
chunkId;
2493 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2494 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2495 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2498 # pragma omp parallel \
2500 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2501 rot1Real,rot1Imag, rot2Real,rot2Imag, numTasks,chunkId,chunkSize,controlQubit) \
2502 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2506 # pragma omp for schedule (static)
2508 for (thisTask=0; thisTask<numTasks; thisTask++) {
2509 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
2512 stateRealUp = stateVecRealUp[thisTask];
2513 stateImagUp = stateVecImagUp[thisTask];
2515 stateRealLo = stateVecRealLo[thisTask];
2516 stateImagLo = stateVecImagLo[thisTask];
2518 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2519 + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2520 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2521 + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2545 long long int ctrlQubitsMask,
long long int ctrlFlipMask,
2547 ComplexArray stateVecUp,
2548 ComplexArray stateVecLo,
2549 ComplexArray stateVecOut)
2552 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2553 long long int thisTask;
2556 long long int chunkId=qureg.
chunkId;
2560 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2561 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2562 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2565 # pragma omp parallel \
2567 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2568 rot1Real,rot1Imag, rot2Real,rot2Imag, ctrlQubitsMask,ctrlFlipMask, numTasks,chunkId,chunkSize) \
2569 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2573 # pragma omp for schedule (static)
2575 for (thisTask=0; thisTask<numTasks; thisTask++) {
2576 if (ctrlQubitsMask == (ctrlQubitsMask & ((thisTask+chunkId*chunkSize) ^ ctrlFlipMask))) {
2578 stateRealUp = stateVecRealUp[thisTask];
2579 stateImagUp = stateVecImagUp[thisTask];
2581 stateRealLo = stateVecRealLo[thisTask];
2582 stateImagLo = stateVecImagLo[thisTask];
2584 stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2585 + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2586 stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2587 + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2595 long long int sizeBlock, sizeHalfBlock;
2596 long long int thisBlock,
2599 qreal stateRealUp,stateImagUp;
2600 long long int thisTask;
2604 sizeHalfBlock = 1LL << targetQubit;
2605 sizeBlock = 2LL * sizeHalfBlock;
2612 # pragma omp parallel \
2614 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks) \
2615 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2619 # pragma omp for schedule (static)
2621 for (thisTask=0; thisTask<numTasks; thisTask++) {
2622 thisBlock = thisTask / sizeHalfBlock;
2623 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2624 indexLo = indexUp + sizeHalfBlock;
2626 stateRealUp = stateVecReal[indexUp];
2627 stateImagUp = stateVecImag[indexUp];
2629 stateVecReal[indexUp] = stateVecReal[indexLo];
2630 stateVecImag[indexUp] = stateVecImag[indexLo];
2632 stateVecReal[indexLo] = stateRealUp;
2633 stateVecImag[indexLo] = stateImagUp;
2652 ComplexArray stateVecIn,
2653 ComplexArray stateVecOut)
2656 long long int thisTask;
2659 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2660 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2663 # pragma omp parallel \
2665 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut,numTasks) \
2670 # pragma omp for schedule (static)
2672 for (thisTask=0; thisTask<numTasks; thisTask++) {
2673 stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2674 stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2681 long long int sizeBlock, sizeHalfBlock;
2682 long long int thisBlock,
2685 qreal stateRealUp,stateImagUp;
2686 long long int thisTask;
2689 long long int chunkId=qureg.
chunkId;
2694 sizeHalfBlock = 1LL << targetQubit;
2695 sizeBlock = 2LL * sizeHalfBlock;
2702 # pragma omp parallel \
2704 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,numTasks,chunkId,chunkSize,controlQubit) \
2705 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
2709 # pragma omp for schedule (static)
2711 for (thisTask=0; thisTask<numTasks; thisTask++) {
2712 thisBlock = thisTask / sizeHalfBlock;
2713 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2714 indexLo = indexUp + sizeHalfBlock;
2716 controlBit =
extractBit(controlQubit, indexUp+chunkId*chunkSize);
2718 stateRealUp = stateVecReal[indexUp];
2719 stateImagUp = stateVecImag[indexUp];
2721 stateVecReal[indexUp] = stateVecReal[indexLo];
2722 stateVecImag[indexUp] = stateVecImag[indexLo];
2724 stateVecReal[indexLo] = stateRealUp;
2725 stateVecImag[indexLo] = stateImagUp;
2743 ComplexArray stateVecIn,
2744 ComplexArray stateVecOut)
2747 long long int thisTask;
2750 long long int chunkId=qureg.
chunkId;
2754 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2755 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2758 # pragma omp parallel \
2760 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2761 numTasks,chunkId,chunkSize,controlQubit) \
2762 private (thisTask,controlBit)
2766 # pragma omp for schedule (static)
2768 for (thisTask=0; thisTask<numTasks; thisTask++) {
2769 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
2771 stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2772 stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2783 long long int globalOffset = qureg.
chunkId * numAmps;
2786 long long int ampInd, mateInd, globalInd;
2787 qreal mateRe, mateIm;
2790 # pragma omp parallel \
2792 shared (stateRe,stateIm, numAmps, ctrlMask,targMask, globalOffset) \
2793 private (ampInd, mateInd,mateRe,mateIm, globalInd)
2797 # pragma omp for schedule (static)
2799 for (ampInd = 0; ampInd < numAmps; ampInd++) {
2810 globalInd = ampInd + globalOffset;
2813 if (ctrlMask && ((ctrlMask & globalInd) != ctrlMask))
2816 mateInd = ampInd ^ targMask;
2819 if (mateInd < ampInd)
2822 mateRe = stateRe[mateInd];
2823 mateIm = stateIm[mateInd];
2826 stateRe[mateInd] = stateRe[ampInd];
2827 stateIm[mateInd] = stateIm[ampInd];
2828 stateRe[ampInd] = mateRe;
2829 stateIm[ampInd] = mateIm;
2835 Qureg qureg,
int ctrlMask,
int targMask,
2836 ComplexArray stateVecIn,
2837 ComplexArray stateVecOut
2840 long long int globalOffset = qureg.
chunkId * numAmps;
2846 qreal* inReal = stateVecIn.real;
2847 qreal* inImag = stateVecIn.imag;
2848 qreal* outReal = stateVecOut.real;
2849 qreal* outImag = stateVecOut.imag;
2851 long long int outInd, outIndGlobal, inInd, inIndGlobal;
2854 # pragma omp parallel \
2856 shared (inReal,inImag,outReal,outImag, numAmps,globalOffset, ctrlMask,targMask) \
2857 private (outInd,outIndGlobal, inInd,inIndGlobal)
2861 # pragma omp for schedule (static)
2863 for (outInd = 0; outInd < numAmps; outInd++) {
2866 outIndGlobal = outInd + globalOffset;
2867 if (ctrlMask && ((ctrlMask & outIndGlobal) != ctrlMask))
2878 inIndGlobal = outIndGlobal ^ targMask;
2879 inInd = inIndGlobal % numAmps;
2881 outReal[outInd] = inReal[inInd];
2882 outImag[outInd] = inImag[inInd];
2889 long long int sizeBlock, sizeHalfBlock;
2890 long long int thisBlock,
2893 qreal stateRealUp,stateImagUp;
2894 long long int thisTask;
2898 sizeHalfBlock = 1LL << targetQubit;
2899 sizeBlock = 2LL * sizeHalfBlock;
2906 # pragma omp parallel \
2908 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,conjFac) \
2909 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2913 # pragma omp for schedule (static)
2915 for (thisTask=0; thisTask<numTasks; thisTask++) {
2916 thisBlock = thisTask / sizeHalfBlock;
2917 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2918 indexLo = indexUp + sizeHalfBlock;
2920 stateRealUp = stateVecReal[indexUp];
2921 stateImagUp = stateVecImag[indexUp];
2923 stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
2924 stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
2925 stateVecReal[indexLo] = conjFac * -stateImagUp;
2926 stateVecImag[indexLo] = conjFac * stateRealUp;
2946 ComplexArray stateVecIn,
2947 ComplexArray stateVecOut,
2948 int updateUpper,
int conjFac)
2951 long long int thisTask;
2954 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2955 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2957 int realSign=1, imagSign=1;
2958 if (updateUpper) imagSign=-1;
2962 # pragma omp parallel \
2964 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2965 realSign,imagSign, numTasks,conjFac) \
2970 # pragma omp for schedule (static)
2972 for (thisTask=0; thisTask<numTasks; thisTask++) {
2973 stateVecRealOut[thisTask] = conjFac * realSign * stateVecImagIn[thisTask];
2974 stateVecImagOut[thisTask] = conjFac * imagSign * stateVecRealIn[thisTask];
2984 long long int sizeBlock, sizeHalfBlock;
2985 long long int thisBlock,
2988 qreal stateRealUp,stateImagUp;
2989 long long int thisTask;
2992 long long int chunkId=qureg.
chunkId;
2997 sizeHalfBlock = 1LL << targetQubit;
2998 sizeBlock = 2LL * sizeHalfBlock;
3005 # pragma omp parallel \
3007 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,chunkId, \
3008 chunkSize,controlQubit,conjFac) \
3009 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
3013 # pragma omp for schedule (static)
3015 for (thisTask=0; thisTask<numTasks; thisTask++) {
3016 thisBlock = thisTask / sizeHalfBlock;
3017 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3018 indexLo = indexUp + sizeHalfBlock;
3020 controlBit =
extractBit(controlQubit, indexUp+chunkId*chunkSize);
3022 stateRealUp = stateVecReal[indexUp];
3023 stateImagUp = stateVecImag[indexUp];
3026 stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
3027 stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
3028 stateVecReal[indexLo] = conjFac * -stateImagUp;
3029 stateVecImag[indexLo] = conjFac * stateRealUp;
3037 ComplexArray stateVecIn,
3038 ComplexArray stateVecOut,
int conjFac)
3041 long long int thisTask;
3044 long long int chunkId=qureg.
chunkId;
3048 qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
3049 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
3052 # pragma omp parallel \
3054 shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
3055 numTasks,chunkId,chunkSize,controlQubit,conjFac) \
3056 private (thisTask,controlBit)
3060 # pragma omp for schedule (static)
3062 for (thisTask=0; thisTask<numTasks; thisTask++) {
3063 controlBit =
extractBit (controlQubit, thisTask+chunkId*chunkSize);
3065 stateVecRealOut[thisTask] = conjFac * stateVecImagIn[thisTask];
3066 stateVecImagOut[thisTask] = conjFac * -stateVecRealIn[thisTask];
3080 long long int sizeBlock, sizeHalfBlock;
3081 long long int thisBlock,
3084 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
3085 long long int thisTask;
3089 sizeHalfBlock = 1LL << targetQubit;
3090 sizeBlock = 2LL * sizeHalfBlock;
3096 qreal recRoot2 = 1.0/sqrt(2);
3099 # pragma omp parallel \
3101 shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, recRoot2, numTasks) \
3102 private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
3106 # pragma omp for schedule (static)
3108 for (thisTask=0; thisTask<numTasks; thisTask++) {
3109 thisBlock = thisTask / sizeHalfBlock;
3110 indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3111 indexLo = indexUp + sizeHalfBlock;
3113 stateRealUp = stateVecReal[indexUp];
3114 stateImagUp = stateVecImag[indexUp];
3116 stateRealLo = stateVecReal[indexLo];
3117 stateImagLo = stateVecImag[indexLo];
3119 stateVecReal[indexUp] = recRoot2*(stateRealUp + stateRealLo);
3120 stateVecImag[indexUp] = recRoot2*(stateImagUp + stateImagLo);
3122 stateVecReal[indexLo] = recRoot2*(stateRealUp - stateRealLo);
3123 stateVecImag[indexLo] = recRoot2*(stateImagUp - stateImagLo);
3140 ComplexArray stateVecUp,
3141 ComplexArray stateVecLo,
3142 ComplexArray stateVecOut,
3146 qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
3147 long long int thisTask;
3151 if (updateUpper) sign=1;
3154 qreal recRoot2 = 1.0/sqrt(2);
3156 qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
3157 qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
3158 qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
3161 # pragma omp parallel \
3163 shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
3164 recRoot2, sign, numTasks) \
3165 private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
3169 # pragma omp for schedule (static)
3171 for (thisTask=0; thisTask<numTasks; thisTask++) {
3173 stateRealUp = stateVecRealUp[thisTask];
3174 stateImagUp = stateVecImagUp[thisTask];
3176 stateRealLo = stateVecRealLo[thisTask];
3177 stateImagLo = stateVecImagLo[thisTask];
3179 stateVecRealOut[thisTask] = recRoot2*(stateRealUp + sign*stateRealLo);
3180 stateVecImagOut[thisTask] = recRoot2*(stateImagUp + sign*stateImagLo);
3187 long long int index;
3188 long long int stateVecSize;
3192 long long int chunkId=qureg.
chunkId;
3199 qreal stateRealLo, stateImagLo;
3204 # pragma omp parallel for \
3206 shared (stateVecSize, stateVecReal,stateVecImag, cosAngle,sinAngle, \
3207 chunkId,chunkSize,targetQubit) \
3208 private (index,targetBit,stateRealLo,stateImagLo) \
3211 for (index=0; index<stateVecSize; index++) {
3214 targetBit =
extractBit (targetQubit, index+chunkId*chunkSize);
3217 stateRealLo = stateVecReal[index];
3218 stateImagLo = stateVecImag[index];
3220 stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3221 stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3228 long long int index;
3229 long long int stateVecSize;
3233 long long int chunkId=qureg.
chunkId;
3240 qreal stateRealLo, stateImagLo;
3241 qreal cosAngle = cos(angle);
3242 qreal sinAngle = sin(angle);
3245 # pragma omp parallel for \
3247 shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize, \
3248 idQubit1,idQubit2,cosAngle,sinAngle ) \
3249 private (index,bit1,bit2,stateRealLo,stateImagLo) \
3252 for (index=0; index<stateVecSize; index++) {
3253 bit1 =
extractBit (idQubit1, index+chunkId*chunkSize);
3254 bit2 =
extractBit (idQubit2, index+chunkId*chunkSize);
3257 stateRealLo = stateVecReal[index];
3258 stateImagLo = stateVecImag[index];
3260 stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3261 stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3268 long long int index;
3269 long long int stateVecSize;
3272 long long int chunkId=qureg.
chunkId;
3274 long long int mask =
getQubitBitMask(controlQubits, numControlQubits);
3280 qreal stateRealLo, stateImagLo;
3281 qreal cosAngle = cos(angle);
3282 qreal sinAngle = sin(angle);
3285 # pragma omp parallel \
3287 shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3288 private (index, stateRealLo, stateImagLo)
3292 # pragma omp for schedule (static)
3294 for (index=0; index<stateVecSize; index++) {
3295 if (mask == (mask & (index+chunkId*chunkSize)) ){
3297 stateRealLo = stateVecReal[index];
3298 stateImagLo = stateVecImag[index];
3300 stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3301 stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3311 mask = mask & (mask-1);
3318 long long int index;
3319 long long int stateVecSize;
3322 long long int chunkId=qureg.
chunkId;
3328 qreal stateReal, stateImag;
3329 qreal cosAngle = cos(angle/2.0);
3330 qreal sinAngle = sin(angle/2.0);
3337 # pragma omp parallel \
3339 shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3340 private (index, fac, stateReal, stateImag)
3344 # pragma omp for schedule (static)
3346 for (index=0; index<stateVecSize; index++) {
3347 stateReal = stateVecReal[index];
3348 stateImag = stateVecImag[index];
3352 stateVecReal[index] = cosAngle*stateReal + fac * sinAngle*stateImag;
3353 stateVecImag[index] = - fac * sinAngle*stateReal + cosAngle*stateImag;
3366 qreal stateReal, stateImag;
3367 qreal cosAngle = cos(angle/2.0);
3368 qreal sinAngle = sin(angle/2.0);
3373 long long int index, globalIndex;
3376 # pragma omp parallel \
3378 shared (offset, stateVecSize, stateVecReal,stateVecImag, ctrlMask,targMask, cosAngle,sinAngle) \
3379 private (index,globalIndex, fac, stateReal,stateImag)
3383 # pragma omp for schedule (static)
3385 for (index=0; index<stateVecSize; index++) {
3386 stateReal = stateVecReal[index];
3387 stateImag = stateVecImag[index];
3390 globalIndex = index + offset;
3391 if (ctrlMask && ((ctrlMask & globalIndex) != ctrlMask))
3396 stateVecReal[index] = cosAngle*stateReal + fac * sinAngle*stateImag;
3397 stateVecImag[index] = - fac * sinAngle*stateReal + cosAngle*stateImag;
3407 long long int diagSpacing = 1LL + densityDim;
3408 long long int maxNumDiagsPerChunk = 1 + localNumAmps / diagSpacing;
3409 long long int numPrevDiags = (qureg.
chunkId>0)? 1+(qureg.
chunkId*localNumAmps)/diagSpacing : 0;
3410 long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3411 long long int localIndNextDiag = globalIndNextDiag % localNumAmps;
3414 long long int numDiagsInThisChunk = maxNumDiagsPerChunk;
3415 if (localIndNextDiag + (numDiagsInThisChunk-1)*diagSpacing >= localNumAmps)
3416 numDiagsInThisChunk -= 1;
3418 long long int visitedDiags;
3419 long long int basisStateInd;
3420 long long int index;
3426 # pragma omp parallel \
3427 shared (localIndNextDiag, numPrevDiags, diagSpacing, stateVecReal, numDiagsInThisChunk) \
3428 private (visitedDiags, basisStateInd, index) \
3429 reduction ( +:zeroProb )
3433 # pragma omp for schedule (static)
3436 for (visitedDiags = 0; visitedDiags < numDiagsInThisChunk; visitedDiags++) {
3438 basisStateInd = numPrevDiags + visitedDiags;
3439 index = localIndNextDiag + diagSpacing * visitedDiags;
3441 if (
extractBit(measureQubit, basisStateInd) == 0)
3442 zeroProb += stateVecReal[index];
3461 long long int sizeBlock,
3464 long long int thisBlock,
3467 qreal totalProbability;
3469 long long int thisTask;
3475 sizeHalfBlock = 1LL << (measureQubit);
3477 sizeBlock = 2LL * sizeHalfBlock;
3480 totalProbability = 0.0;
3486 # pragma omp parallel \
3487 shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag) \
3488 private (thisTask,thisBlock,index) \
3489 reduction ( +:totalProbability )
3493 # pragma omp for schedule (static)
3495 for (thisTask=0; thisTask<numTasks; thisTask++) {
3496 thisBlock = thisTask / sizeHalfBlock;
3497 index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3499 totalProbability += stateVecReal[index]*stateVecReal[index]
3500 + stateVecImag[index]*stateVecImag[index];
3503 return totalProbability;
3515 qreal totalProbability;
3517 long long int thisTask;
3525 totalProbability = 0.0;
3531 # pragma omp parallel \
3532 shared (numTasks,stateVecReal,stateVecImag) \
3533 private (thisTask) \
3534 reduction ( +:totalProbability )
3538 # pragma omp for schedule (static)
3540 for (thisTask=0; thisTask<numTasks; thisTask++) {
3541 totalProbability += stateVecReal[thisTask]*stateVecReal[thisTask]
3542 + stateVecImag[thisTask]*stateVecImag[thisTask];
3546 return totalProbability;
3560 long long int numOutcomeProbs = (1 << numQubits);
3565 # pragma omp parallel \
3567 shared (numOutcomeProbs,outcomeProbs) \
3572 # pragma omp for schedule (static)
3574 for (j=0; j<numOutcomeProbs; j++)
3575 outcomeProbs[j] = 0;
3584 long long int outcomeInd;
3589 # pragma omp parallel \
3590 shared (numTasks,offset, qubits,numQubits, stateRe,stateIm, outcomeProbs) \
3591 private (i, q, outcomeInd, prob)
3595 # pragma omp for schedule (static)
3598 for (i=0; i<numTasks; i++) {
3602 for (q=0; q<numQubits; q++)
3603 outcomeInd +=
extractBit(qubits[q], i + offset) * (1LL << q);
3605 prob = stateRe[i]*stateRe[i] + stateIm[i]*stateIm[i];
3611 outcomeProbs[outcomeInd] += prob;
3619 long long int numOutcomeProbs = (1 << numQubits);
3623 # pragma omp parallel \
3625 shared (numOutcomeProbs,outcomeProbs) \
3630 # pragma omp for schedule (static)
3632 for (j=0; j<numOutcomeProbs; j++)
3633 outcomeProbs[j] = 0;
3639 long long int diagSpacing = 1LL + densityDim;
3640 long long int maxNumDiagsPerChunk = 1 + localNumAmps / diagSpacing;
3641 long long int numPrevDiags = (qureg.
chunkId>0)? 1+(qureg.
chunkId*localNumAmps)/diagSpacing : 0;
3642 long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3643 long long int localIndNextDiag = globalIndNextDiag % localNumAmps;
3646 long long int numDiagsInThisChunk = maxNumDiagsPerChunk;
3647 if (localIndNextDiag + (numDiagsInThisChunk-1)*diagSpacing >= localNumAmps)
3648 numDiagsInThisChunk -= 1;
3650 long long int visitedDiags;
3651 long long int basisStateInd;
3652 long long int index;
3655 long long int outcomeInd;
3659 # pragma omp parallel \
3660 shared (localIndNextDiag, numPrevDiags, diagSpacing, stateRe, numDiagsInThisChunk, qubits,numQubits, outcomeProbs) \
3661 private (visitedDiags, basisStateInd, index, q,outcomeInd)
3665 # pragma omp for schedule (static)
3668 for (visitedDiags = 0; visitedDiags < numDiagsInThisChunk; visitedDiags++) {
3670 basisStateInd = numPrevDiags + visitedDiags;
3671 index = localIndNextDiag + diagSpacing * visitedDiags;
3675 for (q=0; q<numQubits; q++)
3676 outcomeInd +=
extractBit(qubits[q], basisStateInd) * (1LL << q);
3682 outcomeProbs[outcomeInd] += stateRe[index];
3689 long long int index;
3690 long long int stateVecSize;
3694 long long int chunkId=qureg.
chunkId;
3702 # pragma omp parallel for \
3704 shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize,idQubit1,idQubit2 ) \
3705 private (index,bit1,bit2) \
3708 for (index=0; index<stateVecSize; index++) {
3709 bit1 =
extractBit (idQubit1, index+chunkId*chunkSize);
3710 bit2 =
extractBit (idQubit2, index+chunkId*chunkSize);
3712 stateVecReal [index] = - stateVecReal [index];
3713 stateVecImag [index] = - stateVecImag [index];
3720 long long int index;
3721 long long int stateVecSize;
3724 long long int chunkId=qureg.
chunkId;
3726 long long int mask =
getQubitBitMask(controlQubits, numControlQubits);
3733 # pragma omp parallel \
3735 shared (stateVecSize, stateVecReal,stateVecImag, mask, chunkId,chunkSize ) \
3740 # pragma omp for schedule (static)
3742 for (index=0; index<stateVecSize; index++) {
3743 if (mask == (mask & (index+chunkId*chunkSize)) ){
3744 stateVecReal [index] = - stateVecReal [index];
3745 stateVecImag [index] = - stateVecImag [index];
3770 long long int sizeBlock,
3773 long long int thisBlock,
3778 long long int thisTask;
3785 sizeHalfBlock = 1LL << (measureQubit);
3787 sizeBlock = 2LL * sizeHalfBlock;
3789 renorm=1/sqrt(totalProbability);
3795 # pragma omp parallel \
3797 shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,renorm,outcome) \
3798 private (thisTask,thisBlock,index)
3804 # pragma omp for schedule (static)
3806 for (thisTask=0; thisTask<numTasks; thisTask++) {
3807 thisBlock = thisTask / sizeHalfBlock;
3808 index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3809 stateVecReal[index]=stateVecReal[index]*renorm;
3810 stateVecImag[index]=stateVecImag[index]*renorm;
3812 stateVecReal[index+sizeHalfBlock]=0;
3813 stateVecImag[index+sizeHalfBlock]=0;
3818 # pragma omp for schedule (static)
3820 for (thisTask=0; thisTask<numTasks; thisTask++) {
3821 thisBlock = thisTask / sizeHalfBlock;
3822 index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3823 stateVecReal[index]=0;
3824 stateVecImag[index]=0;
3826 stateVecReal[index+sizeHalfBlock]=stateVecReal[index+sizeHalfBlock]*renorm;
3827 stateVecImag[index+sizeHalfBlock]=stateVecImag[index+sizeHalfBlock]*renorm;
3852 long long int thisTask;
3855 qreal renorm=1/sqrt(totalProbability);
3861 # pragma omp parallel \
3862 shared (numTasks,stateVecReal,stateVecImag) \
3867 # pragma omp for schedule (static)
3869 for (thisTask=0; thisTask<numTasks; thisTask++) {
3870 stateVecReal[thisTask] = stateVecReal[thisTask]*renorm;
3871 stateVecImag[thisTask] = stateVecImag[thisTask]*renorm;
3890 long long int thisTask;
3901 # pragma omp parallel \
3902 shared (numTasks,stateVecReal,stateVecImag) \
3907 # pragma omp for schedule (static)
3909 for (thisTask=0; thisTask<numTasks; thisTask++) {
3910 stateVecReal[thisTask] = 0;
3911 stateVecImag[thisTask] = 0;
3929 long long int thisTask;
3930 long long int ind00, ind01, ind10;
3935 # pragma omp parallel \
3937 shared (reVec,imVec,numTasks,qb1,qb2) \
3938 private (thisTask, ind00,ind01,ind10, re01,re10, im01,im10)
3942 # pragma omp for schedule (static)
3944 for (thisTask=0; thisTask<numTasks; thisTask++) {
3951 re01 = reVec[ind01]; im01 = imVec[ind01];
3952 re10 = reVec[ind10]; im10 = imVec[ind10];
3955 reVec[ind01] = re10; reVec[ind10] = re01;
3956 imVec[ind01] = im10; imVec[ind10] = im01;
3974 long long int globalStartInd = qureg.
chunkId * numLocalAmps;
3975 long long int pairGlobalStartInd = pairRank * numLocalAmps;
3977 long long int localInd, globalInd;
3978 long long int pairLocalInd, pairGlobalInd;
3981 # pragma omp parallel \
3983 shared (reVec,imVec,rePairVec,imPairVec,numLocalAmps,globalStartInd,pairGlobalStartInd,qb1,qb2) \
3984 private (localInd,globalInd, pairLocalInd,pairGlobalInd)
3988 # pragma omp for schedule (static)
3990 for (localInd=0; localInd < numLocalAmps; localInd++) {
3992 globalInd = globalStartInd + localInd;
3996 pairLocalInd = pairGlobalInd - pairGlobalStartInd;
3998 reVec[localInd] = rePairVec[pairLocalInd];
3999 imVec[localInd] = imPairVec[pairLocalInd];
4023 qreal re1,im1, re2,im2, reOut,imOut;
4024 long long int index;
4027 # pragma omp parallel \
4028 shared (vecRe1,vecIm1, vecRe2,vecIm2, vecReOut,vecImOut, facRe1,facIm1,facRe2,facIm2, numAmps) \
4029 private (index, re1,im1, re2,im2, reOut,imOut)
4033 # pragma omp for schedule (static)
4035 for (index=0LL; index<numAmps; index++) {
4036 re1 = vecRe1[index]; im1 = vecIm1[index];
4037 re2 = vecRe2[index]; im2 = vecIm2[index];
4038 reOut = vecReOut[index];
4039 imOut = vecImOut[index];
4041 vecReOut[index] = (facReOut*reOut - facImOut*imOut) + (facRe1*re1 - facIm1*im1) + (facRe2*re2 - facIm2*im2);
4042 vecImOut[index] = (facReOut*imOut + facImOut*reOut) + (facRe1*im1 + facIm1*re1) + (facRe2*im2 + facIm2*re2);
4058 long long int index;
4061 # pragma omp parallel \
4062 shared (stateRe,stateIm, opRe,opIm, numAmps) \
4063 private (index, a,b,c,d)
4067 # pragma omp for schedule (static)
4069 for (index=0LL; index<numAmps; index++) {
4076 stateRe[index] = a*c - b*d;
4077 stateIm[index] = a*d + b*c;
4100 long long int index;
4103 # pragma omp parallel \
4104 shared (stateRe,stateIm, opRe,opIm, numAmps,opDim) \
4105 private (index, a,b,c,d)
4109 # pragma omp for schedule (static)
4111 for (index=0LL; index<numAmps; index++) {
4114 c = opRe[index % opDim];
4115 d = opIm[index % opDim];
4118 stateRe[index] = a*c - b*d;
4119 stateIm[index] = a*d + b*c;
4129 long long int index;
4136 qreal vecRe,vecIm,vecAbs, opRe, opIm;
4139 # pragma omp parallel \
4140 shared (stateReal, stateImag, opReal, opImag, numAmps) \
4141 private (index, vecRe,vecIm,vecAbs, opRe,opIm) \
4142 reduction ( +:expecRe, expecIm )
4146 # pragma omp for schedule (static)
4148 for (index=0; index < numAmps; index++) {
4149 vecRe = stateReal[index];
4150 vecIm = stateImag[index];
4151 opRe = opReal[index];
4152 opIm = opImag[index];
4155 vecAbs = vecRe*vecRe + vecIm*vecIm;
4156 expecRe += vecAbs*opRe;
4157 expecIm += vecAbs*opIm;
4162 innerProd.
real = expecRe;
4163 innerProd.
imag = expecIm;
4179 long long int globalIndNextDiag = diagSpacing * numPrevDiags;
4180 long long int localIndNextDiag = globalIndNextDiag % qureg.
numAmpsPerChunk;
4191 long long int stateInd;
4192 long long int opInd;
4193 qreal matRe, matIm, opRe, opIm;
4198 # pragma omp parallel \
4199 shared (stateReal,stateImag, opReal,opImag, localIndNextDiag,diagSpacing,numAmps) \
4200 private (stateInd,opInd, matRe,matIm, opRe,opIm) \
4201 reduction ( +:expecRe, expecIm )
4205 # pragma omp for schedule (static)
4207 for (stateInd=localIndNextDiag; stateInd < numAmps; stateInd += diagSpacing) {
4209 matRe = stateReal[stateInd];
4210 matIm = stateImag[stateInd];
4211 opInd = (stateInd - localIndNextDiag) / diagSpacing;
4212 opRe = opReal[opInd];
4213 opIm = opImag[opInd];
4217 expecRe += matRe * opRe - matIm * opIm;
4218 expecIm += matRe * opIm + matIm * opRe;
4223 expecVal.
real = expecRe;
4224 expecVal.
imag = expecIm;
4233 long long int localEndInd = localStartInd + numElems;
4239 if (localStartInd < 0)
4246 long long int index;
4251 # pragma omp parallel \
4253 shared (localStartInd,localEndInd, vecRe,vecIm, real,imag, offset) \
4258 # pragma omp for schedule (static)
4261 for (index=localStartInd; index < localEndInd; index++) {
4262 vecRe[index] = real[index + offset];
4263 vecIm[index] = imag[index + offset];
4270 qreal* coeffs,
qreal* exponents,
int numTerms,
4271 long long int* overrideInds,
qreal* overridePhases,
int numOverrides,
4283 long long int index, globalAmpInd, phaseInd;
4285 qreal phase, c, s, re, im;
4288 # pragma omp parallel \
4290 shared (chunkId,numAmps, stateRe,stateIm, qubits,numQubits,encoding, coeffs,exponents,numTerms, overrideInds,overridePhases,numOverrides, conj) \
4291 private (index, globalAmpInd, phaseInd, i,t,q, phase, c,s,re,im)
4295 # pragma omp for schedule (static)
4297 for (index=0LL; index<numAmps; index++) {
4300 globalAmpInd = chunkId * numAmps + index;
4305 for (q=0; q<numQubits; q++)
4306 phaseInd += (1LL << q) *
extractBit(qubits[q], globalAmpInd);
4309 for (q=0; q<numQubits-1; q++)
4310 phaseInd += (1LL << q) *
extractBit(qubits[q], globalAmpInd);
4311 if (
extractBit(qubits[numQubits-1], globalAmpInd) == 1)
4312 phaseInd -= (1LL << (numQubits-1));
4316 for (i=0; i<numOverrides; i++)
4317 if (phaseInd == overrideInds[i])
4322 if (i < numOverrides)
4323 phase = overridePhases[i];
4325 for (t=0; t<numTerms; t++)
4326 phase += coeffs[t] * pow(phaseInd, exponents[t]);
4335 re = stateRe[index];
4336 im = stateIm[index];
4339 stateRe[index] = re*c - im*s;
4340 stateIm[index] = re*s + im*c;
4346 Qureg qureg,
int* qubits,
int* numQubitsPerReg,
int numRegs,
enum bitEncoding encoding,
4347 qreal* coeffs,
qreal* exponents,
int* numTermsPerReg,
4348 long long int* overrideInds,
qreal* overridePhases,
int numOverrides,
4362 long long int index, globalAmpInd;
4363 int r, q, i, t, found, flatInd;
4364 qreal phase, c, s, re, im;
4370 # pragma omp parallel \
4372 shared (chunkId,numAmps, stateRe,stateIm, qubits,numQubitsPerReg,numRegs,encoding, coeffs,exponents,numTermsPerReg, overrideInds,overridePhases,numOverrides, conj) \
4373 private (index,globalAmpInd, r,q,i,t,flatInd, found, phaseInds,phase, c,s,re,im)
4377 # pragma omp for schedule (static)
4379 for (index=0LL; index<numAmps; index++) {
4382 globalAmpInd = chunkId * numAmps + index;
4386 for (r=0; r<numRegs; r++) {
4390 for (q=0; q<numQubitsPerReg[r]; q++)
4391 phaseInds[r] += (1LL << q) *
extractBit(qubits[flatInd++], globalAmpInd);
4394 for (q=0; q<numQubitsPerReg[r]-1; q++)
4395 phaseInds[r] += (1LL << q) *
extractBit(qubits[flatInd++], globalAmpInd);
4397 if (
extractBit(qubits[flatInd++], globalAmpInd) == 1)
4398 phaseInds[r] -= (1LL << (numQubitsPerReg[r]-1));
4403 for (i=0; i<numOverrides; i++) {
4405 for (r=0; r<numRegs; r++) {
4406 if (phaseInds[r] != overrideInds[i*numRegs+r]) {
4417 if (i < numOverrides)
4418 phase = overridePhases[i];
4421 for (r=0; r<numRegs; r++) {
4422 for (t=0; t<numTermsPerReg[r]; t++) {
4423 phase += coeffs[flatInd] * pow(phaseInds[r], exponents[flatInd]);
4436 re = stateRe[index];
4437 im = stateIm[index];
4440 stateRe[index] = re*c - im*s;
4441 stateIm[index] = re*s + im*c;
4447 Qureg qureg,
int* qubits,
int* numQubitsPerReg,
int numRegs,
enum bitEncoding encoding,
4449 qreal* params,
int numParams,
4450 long long int* overrideInds,
qreal* overridePhases,
int numOverrides,
4464 long long int index, globalAmpInd;
4465 int r, q, i, found, flatInd;
4466 qreal phase, norm, prod, dist, c, s, re, im;
4472 # pragma omp parallel \
4474 shared (chunkId,numAmps, stateRe,stateIm, qubits,numQubitsPerReg,numRegs,encoding, phaseFuncName,params,numParams, overrideInds,overridePhases,numOverrides, conj) \
4475 private (index,globalAmpInd, r,q,i,flatInd, found, phaseInds,phase,norm,prod,dist, c,s,re,im)
4479 # pragma omp for schedule (static)
4481 for (index=0LL; index<numAmps; index++) {
4484 globalAmpInd = chunkId * numAmps + index;
4488 for (r=0; r<numRegs; r++) {
4492 for (q=0; q<numQubitsPerReg[r]; q++)
4493 phaseInds[r] += (1LL << q) *
extractBit(qubits[flatInd++], globalAmpInd);
4496 for (q=0; q<numQubitsPerReg[r]-1; q++)
4497 phaseInds[r] += (1LL << q) *
extractBit(qubits[flatInd++], globalAmpInd);
4499 if (
extractBit(qubits[flatInd++], globalAmpInd) == 1)
4500 phaseInds[r] -= (1LL << (numQubitsPerReg[r]-1));
4505 for (i=0; i<numOverrides; i++) {
4507 for (r=0; r<numRegs; r++) {
4508 if (phaseInds[r] != overrideInds[i*numRegs+r]) {
4519 if (i < numOverrides)
4520 phase = overridePhases[i];
4529 for (r=0; r<numRegs; r++)
4530 norm += (phaseInds[r] - params[2+r])*(phaseInds[r] - params[2+r]);
4533 for (r=0; r<numRegs; r++)
4534 norm += phaseInds[r]*phaseInds[r];
4537 if (phaseFuncName ==
NORM)
4540 phase = (norm == 0.)? params[0] : 1/norm;
4542 phase = params[0] * norm;
4544 phase = (norm <= REAL_EPS)? params[1] : params[0] / norm;
4551 for (r=0; r<numRegs; r++)
4552 prod *= phaseInds[r];
4557 phase = (prod == 0.)? params[0] : 1/prod;
4559 phase = params[0] * prod;
4561 phase = (prod == 0.)? params[1] : params[0] / prod;
4570 for (r=0; r<numRegs; r+=2)
4571 dist += (phaseInds[r] - phaseInds[r+1] - params[2+r/2])*(phaseInds[r] - phaseInds[r+1] - params[2+r/2]);
4574 for (r=0; r<numRegs; r+=2)
4575 dist += (phaseInds[r+1] - phaseInds[r])*(phaseInds[r+1] - phaseInds[r]);
4581 phase = (dist == 0.)? params[0] : 1/dist;
4583 phase = params[0] * dist;
4585 phase = (dist <= REAL_EPS)? params[1] : params[0] / dist;
4596 re = stateRe[index];
4597 im = stateIm[index];
4600 stateRe[index] = re*c - im*s;
4601 stateIm[index] = re*s + im*c;