compiler: fix missing parallel omp flag for nested#2932
Open
mloubout wants to merge 1 commit into
Open
Conversation
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## main #2932 +/- ##
=======================================
Coverage 83.35% 83.35%
=======================================
Files 248 248
Lines 51734 51734
Branches 4463 4463
=======================================
+ Hits 43122 43124 +2
+ Misses 7859 7858 -1
+ Partials 753 752 -1
Flags with carried forward coverage won't be shown. Click here to find out more. ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
JDBetteridge
approved these changes
May 14, 2026
Contributor
|
Hmmm, I'm still getting a fail with this branch... |
Contributor
|
It's possible that it is unrelated now passes, but is failing here |
Contributor
|
Generated code: print(op.ccode)
/* Devito generated code for Operator `Kernel` */
#define _POSIX_C_SOURCE 200809L
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "omp.h"
struct dataobj
{
void *restrict data;
int * size;
unsigned long nbytes;
unsigned long * npsize;
unsigned long * dsize;
int * hsize;
int * hofs;
int * oofs;
void * dmap;
} ;
struct profiler
{
double section0;
} ;
int Kernel(struct dataobj *restrict u_vec, struct dataobj *restrict u_vec, struct dataobj *restrict u_coords_vec, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const float h_x, const float h_y, const float h_z, const float o_x, const float o_y, const float o_z, const int p_u_M, const int p_u_m, const int time_M, const int time_m, const int nthreads_nested, const int nthreads_nonaffine, struct profiler * timers)
{
float (*restrict u)[u_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]]) u_vec->data;
float (*restrict u)[u_vec->size[1]][u_vec->size[2]][u_vec->size[3]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]][u_vec->size[3]]) u_vec->data;
float (*restrict u_coords)[u_coords_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[u_coords_vec->size[1]]) u_coords_vec->data;
for (int time = time_m, t0 = (time)%(2); time <= time_M; time += 1, t0 = (time)%(2))
{
START(section0)
#pragma omp parallel num_threads(nthreads_nonaffine)
{
int chunk_size = (int)(MAX(1, (int)((1.0/3.0)*(p_u_M - p_u_m + 1)/nthreads_nonaffine)));
#pragma omp for schedule(dynamic,chunk_size)
for (int p_u = p_u_m; p_u <= p_u_M; p_u += 1)
{
for (int rp_ux = 0; rp_ux <= 1; rp_ux += 1)
{
#pragma omp parallel for collapse(2) schedule(static,1) num_threads(nthreads_nested)
for (int rp_uy = 0; rp_uy <= 1; rp_uy += 1)
{
for (int rp_uz = 0; rp_uz <= 1; rp_uz += 1)
{
int posx = (int)(floorf((-o_x + u_coords[p_u][0])/h_x));
int posy = (int)(floorf((-o_y + u_coords[p_u][1])/h_y));
int posz = (int)(floorf((-o_z + u_coords[p_u][2])/h_z));
float px = -floorf((-o_x + u_coords[p_u][0])/h_x) + (-o_x + u_coords[p_u][0])/h_x;
float py = -floorf((-o_y + u_coords[p_u][1])/h_y) + (-o_y + u_coords[p_u][1])/h_y;
float pz = -floorf((-o_z + u_coords[p_u][2])/h_z) + (-o_z + u_coords[p_u][2])/h_z;
if (rp_ux + posx >= x_m - 1 && rp_uy + posy >= y_m - 1 && rp_uz + posz >= z_m - 1 && rp_ux + posx <= x_M + 1 && rp_uy + posy <= y_M + 1 && rp_uz + posz <= z_M + 1)
{
float r0 = (rp_ux*px + (1 - rp_ux)*(1 - px))*(rp_uy*py + (1 - rp_uy)*(1 - py))*(rp_uz*pz + (1 - rp_uz)*(1 - pz))*u[time][p_u];
#pragma omp atomic update
u[t0][rp_ux + posx + 1][rp_uy + posy + 1][rp_uz + posz + 1] += r0;
}
}
}
}
}
}
STOP(section0,timers)
}
return 0;
} |
Contributor
Author
|
Ok so for this one, it makes sense that it fails on ppc since it checks the pragmas (which only ppc supports with the nested). So maybe a skipif('ppc'). Can you list all the tests that fail on ppc on top of this one? |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
No description provided.