Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ImCreate_cube.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ int main()
yc = y0 + r*sin(angle);


imarray.md->write = 1; // set this flag to 1 when writing data
SHMIM_WRITE_ACQUIRE(imarray.md);

index = imarray.md->cnt1 +1;
if(index == imarray.md->size[2])
Expand All @@ -146,13 +146,13 @@ int main()
// imarray.array.F[jj*imarray.md->size[0]+ii] = 0.0f;
}
imarray.md->cnt1 = index;
imarray.md->cnt0++;
SHMIM_CNT0_INCREMENT(imarray.md);
clock_gettime(CLOCK_ISIO, &imarray.md[0].lastaccesstime);

// POST ALL SEMAPHORES
ImageStreamIO_sempost(&imarray, -1);

imarray.md->write = 0; // Done writing data
SHMIM_WRITE_RELEASE(imarray.md);

usleep(dtus);
angle += dangle;
Expand Down
22 changes: 15 additions & 7 deletions ImCreate_img.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,25 @@ int main()

free(imsize);

strcpy(imarray.kw[0].name, "keyword_long");
snprintf(imarray.kw[0].name,
sizeof(imarray.kw[0].name),
"%s", "keyword_long");
imarray.kw[0].type = 'L';
imarray.kw[0].value.numl = 42;

strcpy(imarray.kw[1].name, "keyword_float");
snprintf(imarray.kw[1].name,
sizeof(imarray.kw[1].name),
"%s", "keyword_float");
imarray.kw[1].type = 'D';
imarray.kw[1].value.numf = 3.141592;

strcpy(imarray.kw[2].name, "keyword_string");
snprintf(imarray.kw[2].name,
sizeof(imarray.kw[2].name),
"%s", "keyword_string");
imarray.kw[2].type = 'S';
strcpy(imarray.kw[2].value.valstr, "Hello!");
snprintf(imarray.kw[2].value.valstr,
sizeof(imarray.kw[2].value.valstr),
"%s", "Hello!");

float angle;
float r;
Expand Down Expand Up @@ -109,7 +117,7 @@ int main()
yc = y0 + r*sin(angle);


imarray.md->write = 1; // set this flag to 1 when writing data
SHMIM_WRITE_ACQUIRE(imarray.md);

for(ii=0; ii<imarray.md->size[0]; ii++)
for(jj=0; jj<imarray.md->size[1]; jj++)
Expand All @@ -126,11 +134,11 @@ int main()
// imarray.array.F[jj*imarray.md->size[0]+ii] = 0.0f;
}
imarray.md->cnt1 = 0;
imarray.md->cnt0++;
SHMIM_CNT0_INCREMENT(imarray.md);
// POST ALL SEMAPHORES
ImageStreamIO_sempost(&imarray, -1);

imarray.md->write = 0; // Done writing data
SHMIM_WRITE_RELEASE(imarray.md);

usleep(dtus);
angle += dangle;
Expand Down
6 changes: 3 additions & 3 deletions ImCreate_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ int main()
angle += dangle;
if(angle > 2.0*M_PI) { angle -= 2.0 * M_PI; }

imarray->md->write = 1; // Poor-man's mutex when writing
SHMIM_WRITE_ACQUIRE(imarray->md);

// ->array is union; ->array.F is float pointer to image
float* dotF = imarray->array.F;
Expand All @@ -88,8 +88,8 @@ int main()
// Post all semaphores (index = -1)
ImageStreamIO_sempost(imarray, -1);

imarray->md->write = 0; // Done writing; release mutex
imarray->md->cnt0++;
SHMIM_WRITE_RELEASE(imarray->md);
SHMIM_CNT0_INCREMENT(imarray->md);
imarray->md->cnt1++;
Comment on lines 88 to 93
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here ImageStreamIO_sempost() and SHMIM_WRITE_RELEASE() happen before SHMIM_CNT0_INCREMENT(). If any reader uses cnt0 to confirm/sequence updates after being woken, it can observe a new frame with the previous cnt0 value. Consider updating cnt0 (and any other metadata) before posting semaphores, and clearing write as the last step to publish a fully consistent update.

Copilot uses AI. Check for mistakes.

usleep(dtus); // Wait 1ms
Expand Down
2 changes: 1 addition & 1 deletion ImCreate_test_gpuipc.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ int main()

printf("ImCreate_test_gpuipc is waiting update\n");
ImageStreamIO_semwait(&imarray[0], 0);
while(imarray[0].md[0].write );
while(SHMIM_WRITE_LOAD(&imarray[0].md[0]));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The syntax imarray[0].md[0] is confusing when either of the things indexed [0] is NOT an array and should be removed whenever encountered.


printf("ImCreate_test_gpuipc reads in SHM\n");
cudaMemcpy(h_ptr, d_ptr, imsize[0]*imsize[1]*sizeof(float),
Expand Down
4 changes: 2 additions & 2 deletions ImCreate_test_gpuipc2.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ int main()
printf("ImCreate_test_gpuipc2 is sending update\n");
ImageStreamIO_sempost(&imarray[0], -1);

imarray[0].md[0].write = 0; // Done writing data
imarray[0].md[0].cnt0++;
SHMIM_WRITE_RELEASE(&imarray[0].md[0]); // Done writing data
SHMIM_CNT0_INCREMENT(&imarray[0].md[0]);
imarray[0].md[0].cnt1++;

free(imarray);
Expand Down
10 changes: 5 additions & 5 deletions ImageStreamIO.c
Original file line number Diff line number Diff line change
Expand Up @@ -1592,7 +1592,7 @@ __attribute__((cold)) errno_t ImageStreamIO_createIm_gpu(
clock_gettime(CLOCK_ISIO, &image->md->lastaccesstime);
clock_gettime(CLOCK_ISIO, &image->md->creationtime);

image->md->write = 0;
SHMIM_WRITE_RELEASE(image->md);
image->md->cnt0 = 0;
image->md->cnt1 = 0;
Comment on lines +1595 to 1597
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During image creation, write is released before cnt0/cnt1 are initialized, and cnt0 is still assigned non-atomically. If another process maps the SHM concurrently, it can observe write==0 with partially initialized metadata. Consider initializing counters first (ideally via an atomic store helper) and only then clearing the write flag as the final “publish ready” step.

Suggested change
SHMIM_WRITE_RELEASE(image->md);
image->md->cnt0 = 0;
image->md->cnt1 = 0;
__atomic_store_n(&image->md->cnt0, 0, __ATOMIC_RELAXED);
__atomic_store_n(&image->md->cnt1, 0, __ATOMIC_RELAXED);
SHMIM_WRITE_RELEASE(image->md);

Copilot uses AI. Check for mistakes.

Expand Down Expand Up @@ -2353,8 +2353,8 @@ long ImageStreamIO_UpdateIm_atime( IMAGE *image,
image->md->atime = *atime;
}

image->md->cnt0++;
image->md->write = 0;
SHMIM_CNT0_INCREMENT(image->md);
SHMIM_WRITE_RELEASE(image->md);

#ifdef IMAGESTRUCT_WRITEHISTORY
// Update image write history
Expand Down Expand Up @@ -2414,12 +2414,12 @@ long ImageStreamIO_UpdateIm( IMAGE *image )
**/
long ImageStreamIO_BusywaitForNoWrite(IMAGE *image, int acquire)
{
while(image->md->write)
while(SHMIM_WRITE_LOAD(image->md))
{
}
Comment on lines +2417 to 2419
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This busy-wait loop performs an acquire load on every iteration with an empty body. On weak-memory architectures this can be significantly more expensive than necessary. Consider spinning with a relaxed load (optionally with a CPU pause/yield/backoff), then performing a single acquire fence/load when the flag transitions to 0.

Copilot uses AI. Check for mistakes.
if(acquire)
{
image->md->write = 1;
SHMIM_WRITE_ACQUIRE(image->md);
}
return IMAGESTREAMIO_SUCCESS;
}
5 changes: 5 additions & 0 deletions ImageStreamIO_config.h.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#undef PROJECT_NAME
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those should NOT be undefined, they're critical to how IIRC pkginfo works, and at least to how version information is distributed within the ISIO package.

These throw warning during compilation because we're using it wrong, since we use this file to broadcast IMAGESTRUCT_VERSION, which is a mistake.

IMAGESTRUCT_VERSION should be part of the public API of the project.
This file should be fully private and in time we should fix this.

#define PROJECT_NAME "@PROJECT_NAME@"
#undef VERSION_MAJOR
#define VERSION_MAJOR @VERSION_MAJOR@
#undef VERSION_MINOR
#define VERSION_MINOR @VERSION_MINOR@
#undef VERSION_PATCH
#define VERSION_PATCH @VERSION_PATCH@
#undef VERSION_OPTION
#define VERSION_OPTION "@VERSION_OPTION@"
#define IMAGESTRUCT_VERSION "@VERSION_MAJOR@.@VERSION_MINOR@"
90 changes: 89 additions & 1 deletion ImageStruct.h
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The entire addition is pretty verbose in a file that's pretty big. Can we start another header file with all the weird optimizations separated?
Also, the info " * On x86-64 (TSO), release/acquire compile to plain

  • mov instructions plus a compiler barrier — zero
  • runtime overhead. On ARM/other weak-memory
  • architectures, appropriate dmb fences are emitted." and the like really isn't necessary.
    The entire diff could be <15 lines here...

Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,18 @@ typedef struct
uint64_t cnt1; /**< in 3D rolling buffer image, this is the last slice written */
uint64_t cnt2; /**< in cnt2-based syncronization, proceed until cnt0=cnt2 */

uint8_t write; /**< 1 if image is being written */
/**
* Cross-process write mutex flag.
*
* Writers set to 1 before copying data, then to 0
* after. Readers spin on it to avoid partial frames.
*
* volatile prevents the compiler from caching the
* value in a register (critical for spin loops).
* Use SHMIM_WRITE_* macros for proper CPU memory
* fences via atomic builtins.
*/
volatile uint8_t write;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inline comment is enough info?



uint16_t NBkw; /**< number of keywords (max: 65536) */
Expand Down Expand Up @@ -563,6 +574,83 @@ typedef struct /**< structure used to store data arrays */
} IMAGE;


/*
* =========================================================
* Atomic accessors for IMAGE_METADATA fields
* =========================================================
*
* These macros provide proper CPU memory fences for
* cross-process synchronization of shared memory fields.
*
* On x86-64 (TSO), release/acquire compile to plain
* mov instructions plus a compiler barrier — zero
* runtime overhead. On ARM/other weak-memory
* architectures, appropriate dmb fences are emitted.
*
* Usage:
* SHMIM_WRITE_ACQUIRE(md) — set write=1
* SHMIM_WRITE_RELEASE(md) — set write=0
* SHMIM_WRITE_LOAD(md) — read write flag
* SHMIM_CNT0_INCREMENT(md) — atomically cnt0++
* SHMIM_CNT0_LOAD(md) — read cnt0
*/

#if defined(__STDC_VERSION__) \
&& __STDC_VERSION__ >= 201112L \
&& !defined(__STDC_NO_ATOMICS__)
#include <stdatomic.h>

#define SHMIM_WRITE_ACQUIRE(md) \
atomic_store_explicit( \
(_Atomic uint8_t *)&(md)->write, \
1, memory_order_release)
Comment on lines +598 to +606
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the C11 branch, these macros cast non-atomic struct members to _Atomic * and then use atomic_* APIs on them. Per the C standard this is undefined behavior unless the object itself has an atomic type (and it also relies on the atomic type’s alignment matching the field). Consider using the __atomic builtins for both branches (they are defined on non-atomic objects) or otherwise ensure the underlying object is truly atomic without changing the SHM layout.

Copilot uses AI. Check for mistakes.
Comment on lines +603 to +606
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SHMIM_WRITE_ACQUIRE() currently performs a store with release semantics. Release does not prevent subsequent pixel-data stores from becoming visible before the write=1 store on weakly ordered CPUs, so a reader could still observe write==0 while data writes have started. To make the “set write=1 before copying” contract reliable, add an ordering barrier between setting write=1 and subsequent data writes (e.g., store-relaxed + thread-fence-release, or an equivalent pattern) and consider renaming to avoid implying memory_order_acquire.

Copilot uses AI. Check for mistakes.

#define SHMIM_WRITE_RELEASE(md) \
atomic_store_explicit( \
(_Atomic uint8_t *)&(md)->write, \
0, memory_order_release)

#define SHMIM_WRITE_LOAD(md) \
atomic_load_explicit( \
(_Atomic uint8_t *)&(md)->write, \
memory_order_acquire)

#define SHMIM_CNT0_INCREMENT(md) \
atomic_fetch_add_explicit( \
(_Atomic uint64_t *)&(md)->cnt0, \
1, memory_order_release)

#define SHMIM_CNT0_LOAD(md) \
atomic_load_explicit( \
(_Atomic uint64_t *)&(md)->cnt0, \
memory_order_acquire)

#else
/* GCC/Clang __atomic builtins fallback */

#define SHMIM_WRITE_ACQUIRE(md) \
__atomic_store_n( \
&(md)->write, 1, __ATOMIC_RELEASE)

#define SHMIM_WRITE_RELEASE(md) \
__atomic_store_n( \
&(md)->write, 0, __ATOMIC_RELEASE)

#define SHMIM_WRITE_LOAD(md) \
__atomic_load_n( \
&(md)->write, __ATOMIC_ACQUIRE)

#define SHMIM_CNT0_INCREMENT(md) \
__atomic_add_fetch( \
Copy link

Copilot AI Apr 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SHMIM_CNT0_INCREMENT() returns different values depending on the compilation path: atomic_fetch_add_explicit returns the previous value, while __atomic_add_fetch returns the new value. This can create subtle bugs if callers ever use the macro’s value. Use consistent primitives across branches (e.g., fetch_add in both) and/or make the macro explicitly void-like to discourage using its return value.

Suggested change
__atomic_add_fetch( \
__atomic_fetch_add( \

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarify this API and find which model is right

&(md)->cnt0, 1, __ATOMIC_RELEASE)

#define SHMIM_CNT0_LOAD(md) \
__atomic_load_n( \
&(md)->cnt0, __ATOMIC_ACQUIRE)

#endif


#ifdef __cplusplus
} // extern "C"
#endif
Expand Down