Monday, September 18, 2006

four times faster

UncleBob psoted an article
about "Speed Of Java Cpp and Ruby" on his blog. I read the cpp code and
found a faster way to write it with the same Algorithm.


UncleBob's cpp code (can be
accessed from here:
http://www.butunclebob.com/ArticleS.UncleBob.SpeedOfJavaCppRuby) used a
bool type to save the information for each number. When the max number
increased, the bool array became huge(about 20MBytes for max number
5,000,000). Since the jump length kept increasing in loops, cache
missing will waste may time. We could use just one bit to save the
information for each number to reduce the memory usage and the cache
missing rate. And we can initial the array without setting even bits.
As the code pasted below:

#include <stdio.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>
#include <arpa/inet.h>

double generate(int max) {
int i, n2, j2, m;
struct timeval start;
struct timezone tz;
gettimeofday(&start, &amp;tz);

//char *sieve = new char[max];
m = max / 8;
char sieve[m];

memset((void *)sieve, 0x55, m);
sieve[0] = sieve[0] & (~0x1);
//sieve[0] = sieve[0] & (~0x2);
for (n2=3; n2<sqrt(max); n2+=1) {
int n = n2 - 1;
if (sieve[n/8] & (0x1 << (n%8))) {
for (j2=n2*n2; j2<max; j2+=n2) {
int j = j2 - 1;
if(sieve[j/8] & (0x1 << (j%8))) {
sieve[j / 8] = sieve[j / 8] ^ ((0x1 << (j%8)));
}
}
}
}
struct timeval end;
gettimeofday(&end, &tz);
double startSecond = start.tv_usec/1000000.0;
double endSecond = (end.tv_sec - start.tv_sec) + end.tv_usec/1000000.0;
/*for(i=2; i<100; i++) {
if(sieve[i/8] & (0x1 << (i%8))) {
printf("%d ", i + 1);
}
}
printf("\n");*/
return endSecond - startSecond;
}

int main(int ac, char** av) {
int i;
for (i=100000; i<=5000000; i+=100000) {
double time = generate(i);
printf("%f\n", (float)time);
//cout << time << endl;
}
}

I tested these codes on my
lapton(IBM T60, 1G ram, dual core, 1.83GHz, 2M L2 cache, gcc-4.1, -O3
optimize). It takes about 0.078s when max number is 5,000,000. The
original code takes about 0.146s. So we get about 2 times faster now.

Because many cpu's register is
as the same size of unsigned long, proccess an unsigned long number
each time is better. So I wrote the second version:

#include <stdio.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>
#include <arpa/inet.h>

#define LONG_LENGTH (sizeof(unsigned long) * 8)

double generate(int max) {
int i, n2, j2, m;
struct timeval start;
struct timezone tz;

gettimeofday(&start, &amp;tz);

//char *sieve = new char[max];
m = max / LONG_LENGTH;
unsigned long sieve[m];

memset((void *)sieve, 0x55, m/8);
sieve[0] = sieve[0] & (~0x1);
//sieve[0] = sieve[0] & (~0x2);
for (n2=3; n2<sqrt(max); n2+=1) {
int n = n2 - 1;
if (sieve[n/LONG_LENGTH] & (0x1 << (n%LONG_LENGTH))) {
for (j2=n2*n2; j2<max; j2+=n2) {
int j = j2 - 1;
if(sieve[j/LONG_LENGTH] & (0x1 << (j%LONG_LENGTH))) {
sieve[j / LONG_LENGTH] = sieve[j / LONG_LENGTH] ^ ((0x1 <<
(j%LONG_LENGTH)));
}
}
}
}
struct timeval end;
gettimeofday(&end, &tz);
double startSecond = start.tv_usec/1000000.0;
double endSecond = (end.tv_sec - start.tv_sec) + end.tv_usec/1000000.0;
/*for(i=2; i<100; i++) {
if(sieve[i/LONG_LENGTH] & (0x1 << (i%LONG_LENGTH))) {
printf("%d ", i + 1);
}
}
printf("\n");*/
return endSecond - startSecond;
}

int main(int ac, char** av) {
int i;
for (i=100000; i<=5000000; i+=100000) {
double time = generate(i);
printf("%f\n", (float)time);
//cout << time << endl;
}
}

Now it takes 0.040s in the
same situation. About 4 times faster than the initial version. There
were may optimize method about this algorithm, my partner ZhaoHai offer
one, with a more aggresive initial array, it takes 0.052s:

double generate(int max) {
int i, n2, j2, m;
struct timeval start;
struct timezone tz;
gettimeofday(&start, &amp;tz);

//char *sieve = new char[max];
m = max / 8;
char sieve[m];
char init[3] = {0x50, 0x14, 0x45};
for ( i = 0; i < m / 3; i += 3) {
sieve[i] = 0x50;
sieve[i+1] = 0x14;
sieve[i+2] = 0x45;
}
j2 = m - m % 3;
for ( i = 0; i < m % 3; i++) {
sieve[j2 + i] = init[i];
}
//memset((void *)sieve, 0x55, m);
sieve[0] = sieve[0] & (~0x1);
//sieve[0] = sieve[0] & (~0x2);
for (n2=4; n2<sqrt(max); n2+=1) {
int n = n2 - 1;
if (sieve[n/8] & (0x1 << (n%8))) {
for (j2=n2*n2; j2<max; j2+=n2) {
int j = j2 - 1;
if(sieve[j/8] & (0x1 << (j%8))) {
sieve[j / 8] = sieve[j / 8] ^ ((0x1 << (j%8)));
}
}
}
}
struct timeval end;
gettimeofday(&end, &tz);
double startSecond = start.tv_usec/1000000.0;
double endSecond = (end.tv_sec - start.tv_sec) + end.tv_usec/1000000.0;
/*for(i=2; i<100; i++) {
if(sieve[i/8] & (0x1 << (i%8))) {
printf("%d ", i + 1);
}
}
printf("\n");*/
return endSecond - startSecond;
}

Saturday, September 09, 2006

第一次买时尚杂志

很偶然的 在楼下的7-11看到一本《时尚置业》 封面有一个酷哥抱着一个大头盔 身后是机场跑道 跑道上赫然停了一架F-100 很好奇什么样的疯子会买一架F-100飞着玩 看看它的服役纪录 -- F-100A 生产 203 架,坠毁 50 架;F-100C 生产 476 架,坠毁 85 架;生产数量最多的 1,274 架 F-100D 中有超过 500 架毁损于各种事故。飞这样的飞机的要么是疯子要么是高手,而且封面上的人还抱了一个大头盔,不是飞行员头盔,而是一个宇航员头盔... 这样的杂志,除了买下来我别无选择。
看了正文才知道,封面这哥们既不是疯子也不是高手,我觉得他甚至不会开飞机。他是一家公司--Virgin的老板,名叫布兰森。最近Virgin投资搞了一个Virgin银河公司,专门提供太空旅游项目,所以就摆了这么一个pose。说起太空旅游,我想到的人是鲁坦,世界上第一个实现中途不着陆,不空中加油的环球飞行的人,更酷的是,用来实现那次飞行的旅行者号飞机,是由鲁坦的弟弟(当然也姓鲁坦)设计,由无数的航空爱好者出钱出力,一点点建造起来的,中间完全没有任何国家和大公司的支助。二战以后,创纪录飞行器基本上都是由军方制造的,航空业已经如此的发达,没有国家的投资,光凭爱好者们自己已经折腾不出什么新玩意了,顶多也就是改造一下退役的螺旋桨飞机。而鲁坦打破了这一点,一架由爱好者们手工制造的飞机,创造了一个世界纪录。而太空飞船一号,是鲁坦的下一步,就是用自己制造的飞船把人送入太空。Virgin公司的太空游使用的飞船,怎么看都是鲁坦的作品,怪模怪样的喷气飞机,下面挂载着一枚火箭。不知道Virgin和鲁坦以及保罗艾伦(江湖传闻太空飞船一号是他投资的)是什么关系,但是显然鲁坦是里边最酷的人。布兰森即使抱着大头盔摆了很酷的pose,恐怕还是无法享受到亲手制造东西的快感,尤其是亲手制造很酷的东西 :-)

Friday, August 18, 2006