diff --git a/newlib/ChangeLog b/newlib/ChangeLog index 56b53c022..6d4895b61 100644 --- a/newlib/ChangeLog +++ b/newlib/ChangeLog @@ -1,3 +1,10 @@ +2008-11-27 Ken Werner + + * libc/machine/spu/Makefile.am: Add spu-mcount.S spu-gmon.c. + * libc/machine/spu/Makefile.in: Regenerated. + * libc/machine/spu/spu-gmon.c: New file. + * libc/machine/spu/spu-mcount.S: New file. + 2008-11-27 Joel Sherrill * configure.host (*-rtems*): Turn on using portion of unix subdirectory. diff --git a/newlib/libc/machine/spu/Makefile.am b/newlib/libc/machine/spu/Makefile.am index 8a192dd0a..60831457e 100644 --- a/newlib/libc/machine/spu/Makefile.am +++ b/newlib/libc/machine/spu/Makefile.am @@ -31,7 +31,8 @@ lib_a_SOURCES += calloc_ea.c free_ea.c malloc_ea.c memchr_ea.c memcmp_ea.c \ munmap_ea.c posix_memalign_ea.c realloc_ea.c strcat_ea.c strchr_ea.c \ strcmp_ea.c strcpy_ea.c strcspn_ea.c strlen_ea.c strncat_ea.c strncmp_ea.c \ strncpy_ea.c strpbrk_ea.c strrchr_ea.c strspn_ea.c strstr_ea.c read_ea.c \ - pread_ea.c readv_ea.c write_ea.c pwrite_ea.c writev_ea.c + pread_ea.c readv_ea.c write_ea.c pwrite_ea.c writev_ea.c spu-mcount.S \ + spu-gmon.c endif lib_a_CCASFLAGS = $(AM_CCASFLAGS) diff --git a/newlib/libc/machine/spu/Makefile.in b/newlib/libc/machine/spu/Makefile.in index 723ec4085..2862f1fc4 100644 --- a/newlib/libc/machine/spu/Makefile.in +++ b/newlib/libc/machine/spu/Makefile.in @@ -41,7 +41,8 @@ host_triplet = @host@ @HAVE_SPU_EA_TRUE@ munmap_ea.c posix_memalign_ea.c realloc_ea.c strcat_ea.c strchr_ea.c \ @HAVE_SPU_EA_TRUE@ strcmp_ea.c strcpy_ea.c strcspn_ea.c strlen_ea.c strncat_ea.c strncmp_ea.c \ @HAVE_SPU_EA_TRUE@ strncpy_ea.c strpbrk_ea.c strrchr_ea.c strspn_ea.c strstr_ea.c read_ea.c \ -@HAVE_SPU_EA_TRUE@ pread_ea.c readv_ea.c write_ea.c pwrite_ea.c writev_ea.c +@HAVE_SPU_EA_TRUE@ pread_ea.c readv_ea.c write_ea.c pwrite_ea.c writev_ea.c spu-mcount.S \ +@HAVE_SPU_EA_TRUE@ spu-gmon.c DIST_COMMON = $(srcdir)/../../../../config.guess \ $(srcdir)/../../../../config.sub $(srcdir)/Makefile.in \ @@ -102,7 +103,7 @@ DIST_COMMON = $(srcdir)/../../../../config.guess \ $(srcdir)/../../../../compile $(srcdir)/../../../../compile \ $(srcdir)/../../../../compile $(srcdir)/../../../../compile \ $(srcdir)/../../../../compile $(srcdir)/../../../../compile \ - $(srcdir)/../../../../compile + $(srcdir)/../../../../compile $(srcdir)/../../../../compile subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../../../acinclude.m4 \ @@ -149,7 +150,9 @@ lib_a_LIBADD = @HAVE_SPU_EA_TRUE@ lib_a-readv_ea.$(OBJEXT) \ @HAVE_SPU_EA_TRUE@ lib_a-write_ea.$(OBJEXT) \ @HAVE_SPU_EA_TRUE@ lib_a-pwrite_ea.$(OBJEXT) \ -@HAVE_SPU_EA_TRUE@ lib_a-writev_ea.$(OBJEXT) +@HAVE_SPU_EA_TRUE@ lib_a-writev_ea.$(OBJEXT) \ +@HAVE_SPU_EA_TRUE@ lib_a-spu-mcount.$(OBJEXT) \ +@HAVE_SPU_EA_TRUE@ lib_a-spu-gmon.$(OBJEXT) am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-assert.$(OBJEXT) \ lib_a-clearerr.$(OBJEXT) lib_a-creat.$(OBJEXT) \ lib_a-fclose.$(OBJEXT) lib_a-feof.$(OBJEXT) \ @@ -506,6 +509,12 @@ lib_a-spu_timer_flih.o: spu_timer_flih.S lib_a-spu_timer_flih.obj: spu_timer_flih.S $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-spu_timer_flih.obj `if test -f 'spu_timer_flih.S'; then $(CYGPATH_W) 'spu_timer_flih.S'; else $(CYGPATH_W) '$(srcdir)/spu_timer_flih.S'; fi` +lib_a-spu-mcount.o: spu-mcount.S + $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-spu-mcount.o `test -f 'spu-mcount.S' || echo '$(srcdir)/'`spu-mcount.S + +lib_a-spu-mcount.obj: spu-mcount.S + $(CCAS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-spu-mcount.obj `if test -f 'spu-mcount.S'; then $(CYGPATH_W) 'spu-mcount.S'; else $(CYGPATH_W) '$(srcdir)/spu-mcount.S'; fi` + .c.o: $(COMPILE) -c $< @@ -1177,6 +1186,12 @@ lib_a-writev_ea.o: writev_ea.c lib_a-writev_ea.obj: writev_ea.c $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-writev_ea.obj `if test -f 'writev_ea.c'; then $(CYGPATH_W) 'writev_ea.c'; else $(CYGPATH_W) '$(srcdir)/writev_ea.c'; fi` + +lib_a-spu-gmon.o: spu-gmon.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-spu-gmon.o `test -f 'spu-gmon.c' || echo '$(srcdir)/'`spu-gmon.c + +lib_a-spu-gmon.obj: spu-gmon.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-spu-gmon.obj `if test -f 'spu-gmon.c'; then $(CYGPATH_W) 'spu-gmon.c'; else $(CYGPATH_W) '$(srcdir)/spu-gmon.c'; fi` uninstall-info-am: ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) diff --git a/newlib/libc/machine/spu/spu-gmon.c b/newlib/libc/machine/spu/spu-gmon.c new file mode 100644 index 000000000..527dfc66c --- /dev/null +++ b/newlib/libc/machine/spu/spu-gmon.c @@ -0,0 +1,419 @@ +/* +(C) Copyright IBM Corp. 2008 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of IBM nor the names of its contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +Author: Ken Werner +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Magic cookie. */ +#define GMON_MAGIC_COOKIE "gmon" + +/* Version number. */ +#define GMON_VERSION 1 + +/* Fraction of text space to allocate for histogram counters. */ +#define HISTFRACTION 4 + +/* Histogram counter type. */ +#define HISTCOUNTER unsigned short + +/* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is + based on the minimum number of bytes of separation between two subroutine + call points in the object code. */ +#define HASHFRACTION 4 + +/* Percent of text space to allocate for tostructs with a minimum. */ +#define ARCDENSITY 3 + +/* Minimal amount of arcs. */ +#define MINARCS 50 + +/* Rounding macros. */ +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) +#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) + +/* Sampling rate in Hertz. */ +#define SAMPLE_INTERVAL 100 + +/* Tag definitions for the gmon.out sub headers. */ +#define GMON_TAG_TIME_HIST 0 +#define GMON_TAG_CG_ARC 1 + +struct tostruct +{ + uintptr_t selfpc; + long count; + unsigned short link; +}; + +struct gmon_hdr +{ + char cookie[4]; + int32_t version; + char spare[3 * 4]; +}; + +struct gmon_hist_hdr +{ + uintptr_t low_pc; + uintptr_t high_pc; + int32_t hist_size; + int32_t prof_rate; + char dimen[15]; + char dimen_abbrev; +} __attribute__ ((packed)); + +struct rawarc +{ + uintptr_t raw_frompc; + uintptr_t raw_selfpc; + long raw_count; +} __attribute__ ((packed)); + +/* start and end of the text section */ +extern char _start; +extern char _etext; + +/* froms are indexing tos */ +static __ea unsigned short *froms; +static __ea struct tostruct *tos = 0; +static long tolimit = 0; +static uintptr_t s_lowpc = 0; +static uintptr_t s_highpc = 0; +static unsigned long s_textsize = 0; + +static int fd; +static int hist_size; +static int timer_id; + +void +__sample (int id) +{ + unsigned int pc; + unsigned int pc_backup; + off_t offset; + unsigned short val; + + if (id != timer_id) + return; + + /* Fetch program counter. */ + pc = spu_read_srr0 () & ~3; + pc_backup = pc; + if (pc < s_lowpc || pc > s_highpc) + return; + pc -= (uintptr_t) & _start; + offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr) + + 1 + sizeof (struct gmon_hist_hdr); + + /* Read, increment and write the counter. */ + if (pread (fd, &val, 2, offset) != 2) + { + perror ("can't read the histogram"); + return; + } + if (val < USHRT_MAX) + ++val; + if (pwrite (fd, &val, 2, offset) != 2) + { + perror ("can't write the histogram"); + } +} + +static void +write_histogram (int fd) +{ + struct gmon_hist_hdr hist_hdr; + u_char tag = GMON_TAG_TIME_HIST; + hist_hdr.low_pc = s_lowpc; + hist_hdr.high_pc = s_highpc; + hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins. */ + hist_hdr.prof_rate = 100; /* Hertz. */ + strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); + hist_hdr.dimen_abbrev = 's'; + struct iovec iov[2] = { + {&tag, sizeof (tag)}, + {&hist_hdr, sizeof (struct gmon_hist_hdr)} + }; + if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag)) + perror ("can't write the histogram header"); + + /* Skip the already written histogram data. */ + lseek (fd, hist_size, SEEK_CUR); +} + +static void +write_callgraph (int fd) +{ + int fromindex, endfrom; + uintptr_t frompc; + int toindex; + struct rawarc rawarc; + u_char tag = GMON_TAG_CG_ARC; + endfrom = s_textsize / (HASHFRACTION * sizeof (*froms)); + for (fromindex = 0; fromindex < endfrom; ++fromindex) + { + if (froms[fromindex]) + { + frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms)); + for (toindex = froms[fromindex]; toindex != 0; + toindex = tos[toindex].link) + { + rawarc.raw_frompc = frompc; + rawarc.raw_selfpc = tos[toindex].selfpc; + rawarc.raw_count = tos[toindex].count; + struct iovec iov[2] = { + {&tag, sizeof (tag)}, + {&rawarc, sizeof (struct rawarc)} + }; + if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc)) + perror ("can't write the callgraph"); + } + } + } +} + +void +__mcleanup (void) +{ + struct gmon_hdr ghdr; + + /* Disable sampling. */ + spu_timer_stop (timer_id); + spu_timer_free (timer_id); + spu_clock_stop (); + + /* Jump to the beginning of the gmon.out file. */ + if (lseek (fd, 0, SEEK_SET) == -1) + { + perror ("Cannot seek to the beginning of the gmon.out file."); + close (fd); + return; + } + + /* Write the gmon.out header. */ + memset (&ghdr, '\0', sizeof (struct gmon_hdr)); + memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie)); + ghdr.version = GMON_VERSION; + if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1) + { + perror ("Cannot write the gmon header to the gmon.out file."); + close (fd); + return; + } + + /* Write the sampling buffer (histogram). */ + write_histogram (fd); + + /* Write the call graph. */ + write_callgraph (fd); + + close (fd); +} + +void +__monstartup (void) +{ + s_lowpc = + ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER)); + s_highpc = + ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER)); + s_textsize = s_highpc - s_lowpc; + + hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER); + + /* Allocate froms. */ + froms = malloc_ea (s_textsize / HASHFRACTION); + if (froms == NULL) + { + fprintf (stderr, "Cannot allocate ea memory for the froms array.\n"); + return; + } + memset_ea (froms, 0, s_textsize / HASHFRACTION); + + /* Determine tolimit. */ + tolimit = s_textsize * ARCDENSITY / 100; + if (tolimit < MINARCS) + tolimit = MINARCS; + + /* Allocate tos. */ + tos = malloc_ea (tolimit * sizeof (struct tostruct)); + if (tos == NULL) + { + fprintf (stderr, "Cannot allocate ea memory for the tos array.\n"); + return; + } + memset_ea (tos, 0, tolimit * sizeof (struct tostruct)); + + /* Open the gmon.out file. */ + fd = open ("gmon.out", O_RDWR | O_CREAT | O_TRUNC, 0644); + if (fd == -1) + { + perror ("can't open gmon.out file"); + return; + } + /* Truncate the file up to the size where the histogram fits in. */ + if (ftruncate (fd, + sizeof (struct gmon_hdr) + 1 + sizeof (struct gmon_hist_hdr) + hist_size) == + -1) + perror ("can't truncate the gmon.out file"); + + /* Start the histogram sampler. */ + spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih); + timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, + __sample); + spu_clock_start (); + spu_timer_start (timer_id); + + atexit (__mcleanup); +} + +void +__mcount_internal (uintptr_t frompc, uintptr_t selfpc) +{ + /* sefpc: the address of the function just entered. */ + /* frompc: the caller of the function just entered. */ + unsigned int mach_stat; + __ea unsigned short *frompcindex; + unsigned short toindex; + __ea struct tostruct *top; + __ea struct tostruct *prevtop; + + /* Save current state and disable interrupts. */ + mach_stat = spu_readch(SPU_RdMachStat); + spu_idisable (); + + /* Sanity checks. */ + if (frompc < s_lowpc || frompc > s_highpc) + goto done; + frompc -= s_lowpc; + if (frompc > s_textsize) + goto done; + + /* frompc indexes into the froms array the value at that position indexes + into the tos array. */ + frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))]; + toindex = *frompcindex; + if (toindex == 0) + { + /* First time traversing this arc link of tos[0] incremented. */ + toindex = ++tos[0].link; + /* Sanity check. */ + if (toindex >= tolimit) + { + --tos[0].link; + goto done; + } + /* Save the index into the froms array for the next time we traverse this arc. */ + *frompcindex = toindex; + top = &tos[toindex]; + /* Sets the address of the function just entered. */ + top->selfpc = selfpc; + top->count = 1; + top->link = 0; + goto done; + } + + /* toindex points to a tostruct */ + top = &tos[toindex]; + if (top->selfpc == selfpc) + { + /* The arc is at front of the chain. This is the most common case. */ + top->count++; + goto done; + } + + /* top->selfpc != selfpc + The pc we have got is not the pc we already stored (i.e. multiple function + calls to the same fuction within a function. The arc is not at front of + the chain. */ + for (;;) + { + if (top->link == 0) + { + /* We are at the end of the chain and selfpc was not found. Thus we create + a new tostruct and link it to the head of the chain. */ + toindex = ++tos[0].link; + /* Sanity check. */ + if (toindex >= tolimit) + { + --tos[0].link; + goto done; + } + top = &tos[toindex]; + top->selfpc = selfpc; + top->count = 1; + /* Link back to the old tos entry. */ + top->link = *frompcindex; + /* Store a link to the new top in the froms array which makes the + current tos head of the chain. */ + *frompcindex = toindex; + goto done; + } + else + { + /* Otherwise check the next arc on the chain. */ + prevtop = top; + top = &tos[top->link]; + if (top->selfpc == selfpc) + { + /* selfpc matches; increment its count. */ + top->count++; + /* Move it to the head of the chain. */ + /* Save previous tos index. */ + toindex = prevtop->link; + /* Link the former to to the current tos. */ + prevtop->link = top->link; + /* Link back to the old tos entry. */ + top->link = *frompcindex; + /* Store a link to the new top in the froms array which makes the + current tos head of the chain. */ + *frompcindex = toindex; + goto done; + } + } + } +done: + /* Enable interrupts if necessary. */ + if (__builtin_expect (mach_stat & 1, 0)) + spu_ienable (); +} diff --git a/newlib/libc/machine/spu/spu-mcount.S b/newlib/libc/machine/spu/spu-mcount.S new file mode 100644 index 000000000..624dc7645 --- /dev/null +++ b/newlib/libc/machine/spu/spu-mcount.S @@ -0,0 +1,93 @@ +/* +(C) Copyright IBM Corp. 2008 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of IBM nor the names of its contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +Author: Ken Werner +*/ + +/* _mcount extracts the address of the function just entered and the address + of the caller of that function and then calls __mcount_internal. The + prologue calls mcount without saving any registers. The return address is + stored in $75. The _mcount function has to: + - create a new stack frame + - save registers $2 to $75 on the stack + - copy the two addresses ($0 and $75) into the argument registers $3 and $4 + - call __mcount_internal + - restore registers + - return to $75 */ + +/* The following two convenience macros assist in the coding of the + saving and restoring the register. + + saveregs first, last Saves registers from first to the last. + restoreregs first, last Restores registers from last down to first. + + Note: first must be less than or equal to last. */ +.macro saveregs first, last + stqd $\first, \first*16($SP) +.if \last-\first + saveregs "(\first+1)",\last +.endif +.endm + +.macro restoreregs first, last + lqd $\last, \last*16($SP) +.if \last-\first + restoreregs \first,"(\last-1)" +.endif +.endm + +/* _mcount needs to be resident since the overlay manager uses the scratch + registers too. */ +.text + .align 3 /* 8 byte alignment. */ + .global _mcount + .type _mcount, @function + +_mcount: + stqd $lr, 16($sp) /* Save link register in the callers stack frame. */ + stqd $lr, -1216($sp) /* Store back pointer. */ + il $lr, -1216 /* Push a new stack frame. */ + a $sp, $sp, $lr /* Frame size: 16 * (74 + 2) = 1216. */ + + /* Save registers $2 to $75 on the stack. */ + saveregs 2, 75 + + /* Bring the __mcount_internal arguments in place. */ + lqd $3, 1232($sp) /* frompc (the link register). */ + ori $4, $75, 0 /* selfpc (the gcc prologue puts "brsl $75, _mcount" in + front of every function). */ + brsl $lr, __mcount_internal + + /* Restore register $2 to $75 from the stack. */ + restoreregs 2, 75 + + il $lr, 1216 + a $sp, $sp, $lr /* Pop the stack frame. */ + lqd $lr, 16($sp) /* Restore link register. */ + bi $75 /* Branch to the called function. */