From 1cfe8e2562e5e50769468382626ce36b734741c1 Mon Sep 17 00:00:00 2001
From: "Field G. Van Zee" <field@cs.utexas.edu>
Date: Thu, 5 Sep 2019 16:08:30 -0500
Subject: [PATCH] Reimplemented bli_cpuid_query() for ARM.
Details:
- Rewrote bli_cpuid_query() for ARM architectures to use stdio-based
functions such as fopen() and fgets() instead of popen(). The new code
does more or less the same thing as before--searches /proc/cpuinfo for
various strings, which are then parsed in order to determine the
model, part number, and features. Thanks to Dave Love for suggesting
this change in issue #335.
---
frame/base/bli_cpuid.c | 174 ++++++++++++++++++++++++++-----------------------
frame/base/bli_cpuid.h | 34 +++++-----
2 files changed, 109 insertions(+), 99 deletions(-)
diff --git a/frame/base/bli_cpuid.c b/frame/base/bli_cpuid.c
index f5c53fc..c8891f0 100644
--- a/frame/base/bli_cpuid.c
+++ b/frame/base/bli_cpuid.c
@@ -380,10 +380,12 @@ arch_t bli_cpuid_query_id( void )
// vendor.
vendor = bli_cpuid_query( &model, &part, &features );
- //printf( "vendor = %u\n", vendor );
- //printf( "model = %u\n", model );
- //printf( "part = 0x%x\n", part );
- //printf( "features = %u\n", features );
+#if 0
+ printf( "vendor = %u\n", vendor );
+ printf( "model = %u\n", model );
+ printf( "part = 0x%x\n", part );
+ printf( "features = %u\n", features );
+#endif
if ( vendor == VENDOR_ARM )
{
@@ -909,6 +911,8 @@ int vpu_count( void )
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)
+#define TEMP_BUFFER_SIZE 200
+
uint32_t bli_cpuid_query
(
uint32_t* model,
@@ -919,96 +923,40 @@ uint32_t bli_cpuid_query
*model = MODEL_UNKNOWN;
*part = 0;
*features = 0;
-
-#if 1
- const char* grep_str1 = "grep -m 1 Processor /proc/cpuinfo";
- const char* grep_str2 = "grep -m 1 'CPU part' /proc/cpuinfo";
- const char* grep_str3 = "grep -m 1 Features /proc/cpuinfo";
-#else
- const char* grep_str1 = "grep -m 1 Processor ./proc_cpuinfo";
- const char* grep_str2 = "grep -m 1 'CPU part' ./proc_cpuinfo";
- const char* grep_str3 = "grep -m 1 Features ./proc_cpuinfo";
-#endif
- FILE *fd1 = popen( grep_str1, "r");
- if ( !fd1 )
- {
- //printf("popen 1 failed\n");
- return VENDOR_ARM;
- }
- FILE *fd2 = popen( grep_str2, "r");
- if (!fd2)
- {
- //printf("popen 2 failed\n");
- pclose(fd1);
- return VENDOR_ARM;
- }
- FILE *fd3 = popen( grep_str3, "r");
- if (!fd3)
- {
- //printf("popen 3 failed\n");
- pclose(fd1);
- pclose(fd2);
- return VENDOR_ARM;
- }
-
- uint32_t n1, n2, n3;
- int c;
-
- // First, discover how many chars are in each stream.
- for ( n1 = 0; (c = fgetc(fd1)) != EOF; ++n1 ) continue;
- for ( n2 = 0; (c = fgetc(fd2)) != EOF; ++n2 ) continue;
- for ( n3 = 0; (c = fgetc(fd3)) != EOF; ++n3 ) continue;
-
- //printf( "n1, n2, n3 = %u %u %u\n", n1, n2, n3 );
-
- // Close the streams.
- pclose( fd1 );
- pclose( fd2 );
- pclose( fd3 );
-
- // Allocate the correct amount of memory for each stream.
- char* proc_str = malloc( ( size_t )( n1 + 1 ) );
- char* ptno_str = malloc( ( size_t )( n2 + 1 ) );
- char* feat_str = malloc( ( size_t )( n3 + 1 ) );
- *proc_str = 0;
- *ptno_str = 0;
- *feat_str = 0;
-
- // Re-open the streams. Note that there is no need to check for errors
- // this time since we're assumign that the contents of /proc/cpuinfo
- // will be the same as before.
- fd1 = popen( grep_str1, "r");
- fd2 = popen( grep_str2, "r");
- fd3 = popen( grep_str3, "r");
+ char* pci_str = "/proc/cpuinfo";
+ char proc_str[ TEMP_BUFFER_SIZE ];
+ char ptno_str[ TEMP_BUFFER_SIZE ];
+ char feat_str[ TEMP_BUFFER_SIZE ];
char* r_val;
- // Now read each stream in its entirety. Nothing should go wrong, but
- // if it does, bail out.
- r_val = fgets( proc_str, n1, fd1 );
- if ( n1 && r_val == NULL ) bli_abort();
+ //printf( "bli_cpuid_query(): beginning search\n" );
- r_val = fgets( ptno_str, n2, fd2 );
- if ( n2 && r_val == NULL ) bli_abort();
+ // Search /proc/cpuinfo for the 'Processor' entry.
+ r_val = find_string_in( "Processor", proc_str, TEMP_BUFFER_SIZE, pci_str );
+ if ( r_val == NULL ) return VENDOR_ARM;
- r_val = fgets( feat_str, n3, fd3 );
- if ( n3 && r_val == NULL ) bli_abort();
+ // Search /proc/cpuinfo for the 'CPU part' entry.
+ r_val = find_string_in( "CPU part", ptno_str, TEMP_BUFFER_SIZE, pci_str );
+ if ( r_val == NULL ) return VENDOR_ARM;
- //printf( "proc_str: %s\n", proc_str );
- //printf( "ptno_str: %s\n", ptno_str );
- //printf( "feat_str: %s\n", feat_str );
+ // Search /proc/cpuinfo for the 'Features' entry.
+ r_val = find_string_in( "Features", feat_str, TEMP_BUFFER_SIZE, pci_str );
+ if ( r_val == NULL ) return VENDOR_ARM;
- // Close the streams.
- pclose( fd1 );
- pclose( fd2 );
- pclose( fd3 );
+#if 0
+ printf( "bli_cpuid_query(): full processor string: %s\n", proc_str );
+ printf( "bli_cpuid_query(): full part num string: %s\n", ptno_str );
+ printf( "bli_cpuid_query(): full features string: %s\n", feat_str );
+#endif
// Parse the feature string to check for SIMD features.
if ( strstr( feat_str, "neon" ) != NULL ||
strstr( feat_str, "asimd" ) != NULL )
*features |= FEATURE_NEON;
- //printf( "features var: %u\n", *features );
+
+ //printf( "bli_cpuid_query(): features var: %u\n", *features );
// Parse the processor string to uncover the model.
if ( strstr( proc_str, "ARMv7" ) != NULL )
@@ -1016,7 +964,8 @@ uint32_t bli_cpuid_query
else if ( strstr( proc_str, "AArch64" ) != NULL ||
strstr( proc_str, "ARMv8" ) )
*model = MODEL_ARMV8;
- //printf( "model: %u\n", *model );
+
+ //printf( "bli_cpuid_query(): model: %u\n", *model );
// Parse the part number string.
r_val = strstr( ptno_str, "0x" );
@@ -1024,9 +973,68 @@ uint32_t bli_cpuid_query
{
*part = strtol( r_val, NULL, 16 );
}
- //printf( "part#: %x\n", *part );
+ //printf( "bli_cpuid_query(): part#: %x\n", *part );
return VENDOR_ARM;
}
+char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath )
+{
+ // This function searches for the first line of the file located at
+ // 'filepath' that contains the string 'target' and then copies that
+ // line (actually, the substring of the line starting with 'target')
+ // to 'buffer', which is 'buf_len' bytes long.
+
+ char* r_val = NULL;
+
+ // Allocate a temporary local buffer equal to the size of buffer.
+ char* buf_local = malloc( buf_len * sizeof( char ) );
+
+ // Open the file stream.
+ FILE* stream = fopen( filepath, "r" );
+
+ // Repeatedly read in a line from the stream, storing the contents of
+ // the stream into buf_local.
+ while ( !feof( stream ) )
+ {
+ // Read in the current line, up to buf_len-1 bytes.
+ r_val = fgets( buf_local, buf_len-1, stream );
+
+ //printf( "read line: %s", buf_local );
+
+ // fgets() returns the pointer specified by the first argument (in
+ // this case, buf_local) on success and NULL on error.
+ if ( r_val == NULL ) break;
+
+ // Since fgets() was successful, we can search for the target string
+ // within the current line, as captured in buf_local.
+ r_val = strstr( buf_local, target );
+
+ // If the target string was found in buf_local, we save it to buffer.
+ if ( r_val != NULL )
+ {
+ //printf( " found match to '%s'\n", target );
+
+ // Copy the string read by fgets() to the caller's buffer.
+ strncpy( buffer, buf_local, buf_len );
+
+ // Make sure that we have a terminating null character by the
+ // end of the buffer.
+ if ( buf_len > 0 ) buffer[ buf_len - 1 ] = '\0';
+
+ // Leave the loop since we found the target string.
+ break;
+ }
+ }
+
+ // Close the file stream.
+ fclose( stream );
+
+ // Free the temporary local buffer.
+ free( buf_local );
+
+ // Return r_val so the caller knows if we failed.
+ return r_val;
+}
+
#endif
diff --git a/frame/base/bli_cpuid.h b/frame/base/bli_cpuid.h
index e609dcb..b6ecd3d 100644
--- a/frame/base/bli_cpuid.h
+++ b/frame/base/bli_cpuid.h
@@ -50,28 +50,28 @@
#ifndef BLIS_CPUID_H
#define BLIS_CPUID_H
-arch_t bli_cpuid_query_id( void );
+arch_t bli_cpuid_query_id( void );
// Intel
-bool_t bli_cpuid_is_skx( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_knl( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_haswell( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_skx( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_knl( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_haswell( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features );
// AMD
-bool_t bli_cpuid_is_zen( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_excavator( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features );
-bool_t bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_zen( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_excavator( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features );
+bool_t bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features );
// ARM
-bool_t bli_cpuid_is_thunderx2( uint32_t model, uint32_t part, uint32_t features );
-bool_t bli_cpuid_is_cortexa57( uint32_t model, uint32_t part, uint32_t features );
-bool_t bli_cpuid_is_cortexa53( uint32_t model, uint32_t part, uint32_t features );
-bool_t bli_cpuid_is_cortexa15( uint32_t model, uint32_t part, uint32_t features );
-bool_t bli_cpuid_is_cortexa9( uint32_t model, uint32_t part, uint32_t features );
+bool_t bli_cpuid_is_thunderx2( uint32_t model, uint32_t part, uint32_t features );
+bool_t bli_cpuid_is_cortexa57( uint32_t model, uint32_t part, uint32_t features );
+bool_t bli_cpuid_is_cortexa53( uint32_t model, uint32_t part, uint32_t features );
+bool_t bli_cpuid_is_cortexa15( uint32_t model, uint32_t part, uint32_t features );
+bool_t bli_cpuid_is_cortexa9( uint32_t model, uint32_t part, uint32_t features );
uint32_t bli_cpuid_query( uint32_t* family, uint32_t* model, uint32_t* features );
@@ -156,6 +156,8 @@ enum
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)
+char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath );
+
enum
{
VENDOR_ARM = 0,
--
1.8.3.1