三个c语言测试性能的程序发现龙芯性能极差

第一个求圆周率的程序,目前发现,龙芯到目前为止无法算完999999位数的圆周率,因为我们测试的龙芯3a3000搞坏了,目前只测了99999圆周率,源码如下:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int main(int argc, char * argv[])
{
  clock_t start, finish;
  double  duration;  
  long * pi, * t, m, n, r, s;
  int t0[][3] = {48, 32, 20, 24, 8, 4}, k0[][3] = {1, 1, 0, 1, 1, 1};
  int n0[][3] = {18, 57, 239, 8, 57, 239}, d, i, j, k, p, q;

  d = (argc > 1) ? (((i = atoi(argv[1])) < 0) ? 0 : i) : 1000;
  q = (argc > 2) ? 1 : 0;
  printf("%s\n\n", "Nature (R) Pi value compute Program  (C) Tue 1999.11.30");
  printf("pi= %s%d * arctg(1/%d) %s %d * arctg(1/%d) %s %d * arctg(1/%d) [%s]\n",
    k0[q][0] ? "" : "-", t0[q][0], n0[q][0], k0[q][1] ? "+" : "-", t0[q][1],
    n0[q][1], k0[q][2] ? "+" : "-", t0[q][2], n0[q][2], q ? "Stomer" : "Gauss");
  if ((t = (long *)calloc((d += 5) + 1, sizeof(long))) == NULL) return 1;
  if ((pi = (long *)calloc(d + 1, sizeof(long))) == NULL) return 2;

  start = clock();

  for (i = d; i >= 0; i--) pi[i] = 0;
  for (p = 0; p < 3; p++) {
    for (k=k0[q][p], n=n0[q][p], t[i=j=d]=t0[q][p], i--; i >= 0; i--) t[i] = 0;
    for (r = 0, i = j; i >= 0; i--) {
      r = (m = 10 * r + t[i]) % n;
      t[i] = m / n;
      k ? (pi[i] += t[i]) : (pi[i] -= t[i]);
    }
    while (j > 0 && t[j] == 0) j--;
    for (k = !k, s = 3, n *= n; j > 0; k = !k, s += 2) {
      for (r = 0, i = j; i >= 0; i--) {
        r = (m = 10 * r + t[i]) % n;
        t[i] = m / n;
      }
      while (j > 0 && t[j] == 0) j--;
      for (r = 0, i = j; i >= 0; i--) {
        r = (m = 10 * r + t[i]) % s;
        m /= s;
        k ? (pi[i] += m) : (pi[i] -= m);
      }
    }
  }
  for (n = i = 0; i <= d; pi[i++] = r) {
    n = (m = pi[i] + n) / 10;
    if ((r = m % 10) < 0) r += 10, n--;
  }
  finish = clock();
  printf("pi= %ld.", pi[d]);
  for (i = d - 1; i >= 5; i--)
    printf("%ld%s", pi[i], ((m = d - i + 5) % 65) ? ((m % 5) ? "" : " ") : "\n");
  printf("%sDIGITS: %d\n", (m % 65) ? "\n" : "", d - 5);
  duration = (double)(finish - start) / CLOCKS_PER_SEC;  
  printf("time %f seconds \n", duration);
  return 0;
}
 
已邀请:
这种大量循环的算法……
其实本身算法内对于不同的架构就有不同的影响。

admin

赞同来自: zzz19760225 神龙覆云

如果确实测出性能差,那么给出调优方案,才是真正的给力,一味地指责带不来进步:)

天高地厚

赞同来自:

第二段是一个求积分的过程,在测试过程中,如果不对库功能单独代替,龙芯的情能是极差极差的:
本代码中前部注释的地方,就是对库功能代替实现部分:
默认用gcc编释后,龙芯实在太垃圾了:
#include <stdio.h>
#include <stdlib.h> 
#include <time.h> 
#include <math.h>
// Function to be integrated
// Define and prototype it here
// | sin(x) |
#define INTEG_FUNC(x)  fabs(sin(x))
// Prototype timing function
double dclock(void);
int main(void)
{
   // Loop counters and number of interior points
   unsigned int i, j, N;
   // Stepsize, independent variable x, and accumulated sum
   double step, x_i, sum;
   // Timing variables for evaluation   
   clock_t start, finish;
   // Start integral from 
   double interval_begin = 0.0;
   // Complete integral at 
   double interval_end = 2.0 * 3.141592653589793238;
   // Start timing for the entire application
   printf("     \n");
   printf("    Number of    | Computed Integral | \n");
   printf(" Interior Points |                   | \n");
   start = clock();
   for (j=2;j<27;j++)
   {
    printf("------------------------------------- \n");
     // Compute the number of (internal rectangles + 1)
     N =  1 << j;
     // Compute stepsize for N-1 internal rectangles 
     step = (interval_end - interval_begin) / N;
     // Approx. 1/2 area in first rectangle: f(x0) * [step/2] 
     sum = INTEG_FUNC(interval_begin) * step / 2.0;
     // Apply midpoint rule:
     // Given length = f(x), compute the area of the
     // rectangle of width step
     // Sum areas of internal rectangle: f(xi + step) * step 
     for (i=1;i<N;i++)
     {
        x_i = i * step;
        sum += INTEG_FUNC(x_i) * step;
     }
     // Approx. 1/2 area in last rectangle: f(xN) * [step/2] 
     sum += INTEG_FUNC(interval_end) * step / 2.0;
     printf(" d      |  e   | \n", N, sum);
   }
   finish = clock();
   printf("     \n");
   printf("   Application Clocks   = %d  \n", (finish - start)/CLOCKS_PER_SEC);
   printf("     \n");
   return 0;
}
 

天高地厚

赞同来自:

第三段是龙芯资深爱好者写的混合代码,这个程序我们打算在龙芯上跑时,龙芯让我们弄坏了,结果没跑过,只在x86上跑过,
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include   <math.h>

int main(){

  clock_t start, finish;
  double  duration;  
  double  testa;
  double  testb;
    int  sum=0;
    int  num=1;
    int  sum2=0;
    int  num2=2;
    double   sqrt(double   x);
  start = clock();
    while(num<=90000000){
        sum=sum+num;
        num=num+2;
        testa=sqrt(sum);
        testa=testa*num;
    }
    printf("奇数和为:%d\n",sum);
    printf("testa最终值 %.52lf\n", testa);
    while(num2<=90000000){
        sum2=sum2+num2;
        num2=num2+2;
        testb=sqrt(sum2);
        testb=testb*num2;
    }
  printf("偶数和为:%d\n",sum2);
  printf("testb最终值 %.52lf\n", testb);
  finish = clock();
  duration = (double)(finish - start) / CLOCKS_PER_SEC;  
  printf("time %f seconds \n", duration);

  return 0;
}

天高地厚

赞同来自:

在测试中,我们由于拿不到icc,在x86上用的是三个版本,分别是gcc,与vc2017,与vc98
龙芯用的是北京人的提供的gcc
 

天高地厚

赞同来自:

求积公与圆周率也是吗

天高地厚

赞同来自:

我看是没有对比,就没有伤害吧!真是应了一句不怕不识货,就怕货比货!

神龙覆云 - 计算机专业学生

赞同来自:

是不是代码没有做优化? 我记得matlab的循环速度很慢但是改成向量运算就飞起来了

要回复问题请先登录注册