AMD GCN: Implement circular buffering.
The GCN port outputs stdout and stderr via a shared-memory interface. Previously the buffer was limited to 1000 write operations, which was enough for testing purposes, but easy to exhaust. This patch implements a new circular buffering system allowing a greater amount of output. The interface must allow hundreds of hardware threads to output simultaneously. The new limit is UINT32_MAX write operations. Unfortunately, there's no way to tell if the host side has also been updated. This code will misbehave unless the gcn-run from GCC is also updated (although it's fine the other way around), but that patch has already been committed. OK? Andrew Stubbs Mentor Graphics / CodeSourcery
This commit is contained in:
parent
38322b9bf6
commit
62c66a39bd
|
@ -26,10 +26,14 @@
|
||||||
|
|
||||||
The next_output counter must be atomically incremented for each
|
The next_output counter must be atomically incremented for each
|
||||||
print output. Only when the print data is fully written can the
|
print output. Only when the print data is fully written can the
|
||||||
"written" flag be set. */
|
"written" flag be set.
|
||||||
|
|
||||||
|
The buffer is circular; the host increments the consumed counter
|
||||||
|
and clears the written flag as it goes, opening up slots for reuse.
|
||||||
|
The counters always use absolute numbers. */
|
||||||
struct output {
|
struct output {
|
||||||
int return_value;
|
int return_value;
|
||||||
int next_output;
|
unsigned int next_output;
|
||||||
struct printf_data {
|
struct printf_data {
|
||||||
int written;
|
int written;
|
||||||
char msg[128];
|
char msg[128];
|
||||||
|
@ -39,7 +43,8 @@ struct output {
|
||||||
double dvalue;
|
double dvalue;
|
||||||
char text[128];
|
char text[128];
|
||||||
};
|
};
|
||||||
} queue[1000];
|
} queue[1024];
|
||||||
|
unsigned int consumed;
|
||||||
};
|
};
|
||||||
|
|
||||||
_READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count)
|
_READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count)
|
||||||
|
@ -55,33 +60,49 @@ _READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count)
|
||||||
struct output *data = (struct output *)kernargs[2];
|
struct output *data = (struct output *)kernargs[2];
|
||||||
|
|
||||||
/* Each output slot allows 256 bytes, so reserve as many as we need. */
|
/* Each output slot allows 256 bytes, so reserve as many as we need. */
|
||||||
int slot_count = ((count+1)/256)+1;
|
unsigned int slot_count = ((count+1)/256)+1;
|
||||||
int index = __atomic_fetch_add (&data->next_output, slot_count,
|
unsigned int index = __atomic_fetch_add (&data->next_output, slot_count,
|
||||||
__ATOMIC_ACQUIRE);
|
__ATOMIC_ACQUIRE);
|
||||||
|
|
||||||
|
if ((unsigned int)(index + slot_count) < data->consumed)
|
||||||
|
{
|
||||||
|
/* Overflow. */
|
||||||
|
errno = EFBIG;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
for (int c = count;
|
for (int c = count;
|
||||||
c >= 0 && index < 1000;
|
c >= 0;
|
||||||
buf += 256, c -= 256, index++)
|
buf += 256, c -= 256, index++)
|
||||||
{
|
{
|
||||||
|
unsigned int slot = index % 1024;
|
||||||
|
|
||||||
|
/* Spinlock while the host catches up. */
|
||||||
|
if (index >= 1024)
|
||||||
|
while (__atomic_load_n (&data->consumed, __ATOMIC_ACQUIRE)
|
||||||
|
<= (index - 1024))
|
||||||
|
asm ("s_sleep 64");
|
||||||
|
|
||||||
if (c < 128)
|
if (c < 128)
|
||||||
{
|
{
|
||||||
memcpy (data->queue[index].msg, buf, c);
|
memcpy (data->queue[slot].msg, buf, c);
|
||||||
data->queue[index].msg[c] = '\0';
|
data->queue[slot].msg[c] = '\0';
|
||||||
data->queue[index].text[0] = '\0';
|
data->queue[slot].text[0] = '\0';
|
||||||
}
|
}
|
||||||
else if (c < 256)
|
else if (c < 256)
|
||||||
{
|
{
|
||||||
memcpy (data->queue[index].msg, buf, 128);
|
memcpy (data->queue[slot].msg, buf, 128);
|
||||||
memcpy (data->queue[index].text, buf+128, c-128);
|
memcpy (data->queue[slot].text, buf+128, c-128);
|
||||||
data->queue[index].text[c-128] = '\0';
|
data->queue[slot].text[c-128] = '\0';
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
memcpy (data->queue[index].msg, buf, 128);
|
memcpy (data->queue[slot].msg, buf, 128);
|
||||||
memcpy (data->queue[index].text, buf+128, 128);
|
memcpy (data->queue[slot].text, buf+128, 128);
|
||||||
}
|
}
|
||||||
|
|
||||||
data->queue[index].type = 3; /* Raw. */
|
data->queue[slot].type = 3; /* Raw. */
|
||||||
__atomic_store_n (&data->queue[index].written, 1, __ATOMIC_RELEASE);
|
__atomic_store_n (&data->queue[slot].written, 1, __ATOMIC_RELEASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
|
|
Loading…
Reference in New Issue