UncleBob psoted an article
about "Speed Of Java Cpp and Ruby" on his blog. I read the cpp code and
found a faster way to write it with the same Algorithm.
UncleBob's cpp code (can be
accessed from here:
http://www.butunclebob.com/ArticleS.UncleBob.SpeedOfJavaCppRuby) used a
bool type to save the information for each number. When the max number
increased, the bool array became huge(about 20MBytes for max number
5,000,000). Since the jump length kept increasing in loops, cache
missing will waste may time. We could use just one bit to save the
information for each number to reduce the memory usage and the cache
missing rate. And we can initial the array without setting even bits.
As the code pasted below:
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>
#include <arpa/inet.h>
double generate(int max) {
int i, n2, j2, m;
struct timeval start;
struct timezone tz;
gettimeofday(&start, &tz);
//char *sieve = new char[max];
m = max / 8;
char sieve[m];
memset((void *)sieve, 0x55, m);
sieve[0] = sieve[0] & (~0x1);
//sieve[0] = sieve[0] & (~0x2);
for (n2=3; n2<sqrt(max); n2+=1) {
int n = n2 - 1;
if (sieve[n/8] & (0x1 << (n%8))) {
for (j2=n2*n2; j2<max; j2+=n2) {
int j = j2 - 1;
if(sieve[j/8] & (0x1 << (j%8))) {
sieve[j / 8] = sieve[j / 8] ^ ((0x1 << (j%8)));
}
}
}
}
struct timeval end;
gettimeofday(&end, &tz);
double startSecond = start.tv_usec/1000000.0;
double endSecond = (end.tv_sec - start.tv_sec) + end.tv_usec/1000000.0;
/*for(i=2; i<100; i++) {
if(sieve[i/8] & (0x1 << (i%8))) {
printf("%d ", i + 1);
}
}
printf("\n");*/
return endSecond - startSecond;
}
int main(int ac, char** av) {
int i;
for (i=100000; i<=5000000; i+=100000) {
double time = generate(i);
printf("%f\n", (float)time);
//cout << time << endl;
}
}I tested these codes on my
lapton(IBM T60, 1G ram, dual core, 1.83GHz, 2M L2 cache, gcc-4.1, -O3
optimize). It takes about 0.078s when max number is 5,000,000. The
original code takes about 0.146s. So we get about 2 times faster now.
Because many cpu's register is
as the same size of unsigned long, proccess an unsigned long number
each time is better. So I wrote the second version:
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>
#include <arpa/inet.h>
#define LONG_LENGTH (sizeof(unsigned long) * 8)
double generate(int max) {
int i, n2, j2, m;
struct timeval start;
struct timezone tz;
gettimeofday(&start, &tz);
//char *sieve = new char[max];
m = max / LONG_LENGTH;
unsigned long sieve[m];
memset((void *)sieve, 0x55, m/8);
sieve[0] = sieve[0] & (~0x1);
//sieve[0] = sieve[0] & (~0x2);
for (n2=3; n2<sqrt(max); n2+=1) {
int n = n2 - 1;
if (sieve[n/LONG_LENGTH] & (0x1 << (n%LONG_LENGTH))) {
for (j2=n2*n2; j2<max; j2+=n2) {
int j = j2 - 1;
if(sieve[j/LONG_LENGTH] & (0x1 << (j%LONG_LENGTH))) {
sieve[j / LONG_LENGTH] = sieve[j / LONG_LENGTH] ^ ((0x1 <<
(j%LONG_LENGTH)));
}
}
}
}
struct timeval end;
gettimeofday(&end, &tz);
double startSecond = start.tv_usec/1000000.0;
double endSecond = (end.tv_sec - start.tv_sec) + end.tv_usec/1000000.0;
/*for(i=2; i<100; i++) {
if(sieve[i/LONG_LENGTH] & (0x1 << (i%LONG_LENGTH))) {
printf("%d ", i + 1);
}
}
printf("\n");*/
return endSecond - startSecond;
}
int main(int ac, char** av) {
int i;
for (i=100000; i<=5000000; i+=100000) {
double time = generate(i);
printf("%f\n", (float)time);
//cout << time << endl;
}
}Now it takes 0.040s in the
same situation. About 4 times faster than the initial version. There
were may optimize method about this algorithm, my partner ZhaoHai offer
one, with a more aggresive initial array, it takes 0.052s:
double generate(int max) {
int i, n2, j2, m;
struct timeval start;
struct timezone tz;
gettimeofday(&start, &tz);
//char *sieve = new char[max];
m = max / 8;
char sieve[m];
char init[3] = {0x50, 0x14, 0x45};
for ( i = 0; i < m / 3; i += 3) {
sieve[i] = 0x50;
sieve[i+1] = 0x14;
sieve[i+2] = 0x45;
}
j2 = m - m % 3;
for ( i = 0; i < m % 3; i++) {
sieve[j2 + i] = init[i];
}
//memset((void *)sieve, 0x55, m);
sieve[0] = sieve[0] & (~0x1);
//sieve[0] = sieve[0] & (~0x2);
for (n2=4; n2<sqrt(max); n2+=1) {
int n = n2 - 1;
if (sieve[n/8] & (0x1 << (n%8))) {
for (j2=n2*n2; j2<max; j2+=n2) {
int j = j2 - 1;
if(sieve[j/8] & (0x1 << (j%8))) {
sieve[j / 8] = sieve[j / 8] ^ ((0x1 << (j%8)));
}
}
}
}
struct timeval end;
gettimeofday(&end, &tz);
double startSecond = start.tv_usec/1000000.0;
double endSecond = (end.tv_sec - start.tv_sec) + end.tv_usec/1000000.0;
/*for(i=2; i<100; i++) {
if(sieve[i/8] & (0x1 << (i%8))) {
printf("%d ", i + 1);
}
}
printf("\n");*/
return endSecond - startSecond;
}
