infiniband component

infiniband component

Postby nsmeds » Fri Mar 08, 2013 7:30 am

Hi,

I have started playing a bit with components and tried the infiniband component. I am not a ibverbs programmer or even network coder so I am having some problem getting off the ground. I am using 5.1.0-2.

First I have a problem in that the node I am testing on has two IB ports, but only one active. Initially the papi_component_avail failed because of this. I have done the attached patch to get beyond that, but I can not get an example such as papi_command_line infiniband:::mlx4_0_1_recv to work either despite or because my patches ;-)

(gdb) where
#0 0x00000039c4e081a0 in mad_get_retries () from /usr/lib64/libibmad.so.5
#1 0x00000039c4e0a06b in mad_rpc () from /usr/lib64/libibmad.so.5
#2 0x00000039c4e0ab0c in pma_query_via () from /usr/lib64/libibmad.so.5
#3 0x0000000000427ed8 in read_ib_counter (ctx=0x0, ctrl=0x7fffa0dbf5f0) at components/infiniband/linux-infiniband.c:267
#4 host_read_values (ctx=0x0, ctrl=0x7fffa0dbf5f0) at components/infiniband/linux-infiniband.c:316
#5 INFINIBAND_start (ctx=0x0, ctrl=0x7fffa0dbf5f0) at components/infiniband/linux-infiniband.c:590
#6 0x00000000004092d2 in PAPI_start (EventSet=0) at papi.c:2169
#7 0x00000000004027c3 in main (argc=0, argv=0x7fffa0dbf5f0) at command_line.c:113


Should this work? Who owns the component that I can discuss with to see what is going on?

What I tried to achieve with my patch was to allow for only some ports to be initialized. So I introduced a new state portstatus->is_initialized == -1 to indicate "I tried that port, but can't use it".

Code: Select all
diff -rup ../papi-5.1.0/src/components/infiniband/linux-infiniband.c rapl_net_infiniband_mx/src/components/infiniband/linux-infiniband.c
--- ../papi-5.1.0/src/components/infiniband/linux-infiniband.c  2013-01-15 15:44:39.000000000 -0500
+++ rapl_net_infiniband_mx/src/components/infiniband/linux-infiniband.c 2013-03-08 06:08:15.504084587 -0500
@@ -1,3 +1,5 @@
+#define VERBOSE 1
+//#undef VERBOSE
 /****************************/
 /* THIS IS OPEN SOURCE CODE */
 /****************************/
@@ -184,6 +186,9 @@ addIBPort( const char *ca_name, umad_por
 static int
 init_ib_port( ib_port * portdata )
 {
+        ib_portid_t old_portid=portid;
+       int old_ibportnum=ibportnum;
+
        int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
                IB_PERFORMANCE_CLASS
        };
@@ -193,29 +198,38 @@ init_ib_port( ib_port * portdata )
 
        srcport = mad_rpc_open_port( ca, portdata->port_number, mgmt_classes, 4 );
        if ( !srcport ) {
-               fprintf( stderr, "Failed to open '%s' port '%d'\n", ca,
-                                portdata->port_number );
-               exit( 1 );
+               fprintf( stderr, "%s[%s](%d):: Failed to open '%s' port '%d'\n",
+                                 __FILE__, __FUNCTION__, __LINE__,
+                                 ca, portdata->port_number );
+               portdata->is_initialized = -1;
+               return 1;
        }
 
        if ( ib_resolve_self_via( &portid, &ibportnum, 0, srcport ) < 0 ) {
-               fprintf( stderr, "can't resolve self port\n" );
-               exit( 1 );
+               fprintf( stderr, "%s[%s](%d):: Can't resolve self port\n", __FILE__, __FUNCTION__, __LINE__ );
+               portdata->is_initialized = -1;
+               portid=old_portid;
+               ibportnum=old_ibportnum;
+               return 1;
        }
 
        /* PerfMgt ClassPortInfo is a required attribute */
        /* might be redundant, could be left out for fast implementation */
        if ( !pma_query_via
                 ( pc, &portid, ibportnum, ib_timeout, CLASS_PORT_INFO, srcport ) ) {
-               fprintf( stderr, "classportinfo query\n" );
-               exit( 1 );
+               fprintf( stderr, "%s[%s](%d):: classportinfo query failed\n", __FILE__, __FUNCTION__, __LINE__ );
+               portdata->is_initialized = -1;
+               portid=old_portid;
+               return 1;
        }
 
        if ( !performance_reset_via
                 ( pc, &portid, ibportnum, mask, ib_timeout, IB_GSI_PORT_COUNTERS,
                   srcport ) ) {
-               fprintf( stderr, "perf reset\n" );
-               exit( 1 );
+               fprintf( stderr, "%s[%s](%d)::performance_reset_via failed\n", __FILE__, __FUNCTION__, __LINE__);
+               portdata->is_initialized = -1;
+               portid=old_portid;
+               return 1;
        }
 
        /* read the initial values */
@@ -226,6 +240,7 @@ init_ib_port( ib_port * portdata )
 
        portdata->is_initialized = 1;
 
+       fprintf( stderr, "%s[%s](%d)::port %s is initialized \n", __FILE__, __FUNCTION__, __LINE__, ca);
        return 0;
 }
 
@@ -372,18 +387,23 @@ host_subscribe( const char *cntr )
                                        strncpy( tmp_name, cntr, len - 5 );
                                        tmp_name[len - 5] = 0;
                                        aktp = root_ib_port;
-                                       // printf("looking for IB port '%s'\n", tmp_name);
+#ifdef VERBOSE
+                                       fprintf(stderr, "%s:: looking for %s at IB port '%s'\n",
+                                               __FUNCTION__, cntr, tmp_name );
+#endif
                                        while ( aktp != NULL ) {
                                                if ( strcmp( aktp->name, tmp_name ) == 0 ) {
-                                                       if ( !aktp->is_initialized ) {
+                                                       if ( aktp->is_initialized == 0 ) {
                                                                init_ib_port( aktp );
+                                                       }
+                                                       if ( aktp->is_initialized > 0 ) {
                                                                active_ib_port = aktp;
+                                                               return loop + 1;
                                                        }
-                                                       return loop + 1;
                                                }
                                                /* name does not match, if this counter is
                                                   initialized, we can't have two active IB ports */
-                                               if ( aktp->is_initialized ) {
+                                               if ( aktp->is_initialized > 0) {
 #if 0  /* not necessary with OFED version >= 1.4 */
                                                        fprintf( stderr,
                                                                         "unable to activate IB port monitoring for more than one port\n" );
@@ -513,8 +533,12 @@ INFINIBAND_init_thread( hwd_context_t *
 
        counter_list = host_listCounter( num_counters );
 
-       for ( i = 0; i < counter_list->count; i++ )
+       for ( i = 0; i < counter_list->count; i++ ) {
+#ifdef VERBOSE
+               fprintf(stderr,"%s(%d):: Subscribing to counter list entry %3d %s\n",__FUNCTION__,__LINE__,i, counter_list->data[i] );
+#endif
                host_subscribe( counter_list->data[i] );
+       }
 
        ( ( INFINIBAND_context_t * ) ctx )->state.ncounter = counter_list->count;
 
@@ -529,7 +553,7 @@ INFINIBAND_init_thread( hwd_context_t *
  * PAPI process is initialized (IE PAPI_library_init)
  */
 int
-INFINIBAND_init_component(  )
+INFINIBAND_init_component( int cidx )
 {
        int i;
 
Only in rapl_net_infiniband_mx/src/components/lmsensors: config.log
diff -rup ../papi-5.1.0/src/components/mx/linux-mx.c rapl_net_infiniband_mx/src/components/mx/linux-mx.c
--- ../papi-5.1.0/src/components/mx/linux-mx.c  2013-01-15 15:44:39.000000000 -0500
+++ rapl_net_infiniband_mx/src/components/mx/linux-mx.c 2013-03-08 05:18:07.133219079 -0500
@@ -220,7 +220,7 @@ read_mx_counters( long long *counters )
  * PAPI process is initialized (IE PAPI_library_init)
  */
 int
-_mx_init_component(  )
+_mx_init_component(int cidx  )
 {
 
        FILE *fff;


Cheers,

/Nils
nsmeds
 
Posts: 4
Joined: Tue Mar 05, 2013 11:15 am

Return to Component PAPI (PAPI-C)

Who is online

Users browsing this forum: No registered users and 4 guests

cron