作者在 2010-07-02 01:07:46 发布以下内容
/**************************************************\
* Functions developed by zhong yunde. *
* filename : test_readfile.cpp *
* creat time : 2008/05/08 *
* 功能:测试文件读取模式(用于程序优化) *
\**************************************************/
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <time.h>
#define XSIZE 2592
#define YSIZE 1944
#define IMGSIZE XSIZE*YSIZE
typedef struct RGB {
unsigned char r;
unsigned char g;
unsigned char b;
} RGB;
struct RGB in[IMGSIZE];
unsigned char out[IMGSIZE];
// 彩色图象转黑白图象
void calc_lum1(void)
{
for(unsigned long i=0;i<IMGSIZE;i++)
{
double r,g,b,y;
r=in[i].r; g=in[i].g; b=in[i].b;
y=0.299*r+0.587*g+0.114*b;
out[i]=(unsigned char)y;
}
}
void calc_lum2(void)
{
for(unsigned long i=0;i<IMGSIZE;i++)
{
double r,g,b,y;
r=in[i].r; g=in[i].g; b=in[i].b;
y=(long)(r*1224+g*2404+b*467)>>12;
out[i]=(unsigned char)y;
}
}
// 查表数组
register unsigned char D[256],F[256],E[256];
void table_init()
{
for(short i=0;i<256;i++)
{
D[i]=(i*1224)>>12;
E[i]=(i*2404)>>12;
F[i]=(i*467)>>12;
}
}
void calc_lum3(void)
{
for(long i=0;i<IMGSIZE;i++)
{
unsigned char r,g,b;
r=D[in[i].r]; g=E[in[i].g]; b=F[in[i].b]; //查表
out[i] = r+g+b ;
}
}
inline void calc_lum5(void)
{
for(long i=0;i<IMGSIZE;i++)
{
unsigned char r,g,b;
r=D[in[i].r]; g=E[in[i].g]; b=F[in[i].b]; //查表
out[i] = r+g+b;
}
}
// 双核处理
void calc_lum4(void)
{
for(long i=0;i<IMGSIZE;i += 2)
{
double r,g,b,y, r1,g1,b1,y1;
// ALU1
r=D[in[i].r]; g=E[in[i].g]; b=F[in[i].b]; //查表
y=(long)(r+g+b)>>12;
out[i]=(unsigned char)y;
// ALU2
r1=D[in[i+1].r]; g1=E[in[i+1].g]; b1=F[in[i+1].b]; //查表
y1=(long)(r1+g1+b1);
out[i]=(unsigned char)y1;
}
}
int main()
{
char *filein = "Bretagneg.raw";
char *fileout = "Black.raw";
clock_t start_time,end_time;
FILE *IN=NULL, *OUT=NULL;
unsigned long i=0;
if ( NULL==(IN=fopen(filein, "rb"))) {
fprintf(stderr, "Failed to open %s for reading !!\n", filein);
return -1;
}
if ( NULL==(OUT=fopen(fileout, "wb")) ) {
fprintf(stderr, "Failed to open %s for reading !!\n", fileout);
return -1;
}
// 方式一 : 整体读取 ,指针赋值
fseek(IN,0,SEEK_SET);
start_time=clock();
unsigned char *image=(unsigned char *)malloc(sizeof(RGB)*IMGSIZE);
fread(image,sizeof(RGB)*IMGSIZE,1,IN);
for (i=0;i<IMGSIZE;i++)
{
in[i].r = *(image++);//image[3*i];
in[i].g = *(image++);//image[3*i+1];
in[i].b = *(image++);//image[3*i+2];
}
image -= sizeof(RGB)*IMGSIZE;
free(image); image =NULL;
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式二 : 整体读取 ,数组赋值(竟然比上面指针方式好!)
fseek(IN,0,SEEK_SET);
start_time=clock();
image=(unsigned char *)malloc(sizeof(RGB)*IMGSIZE);
fread(image,sizeof(RGB)*IMGSIZE,1,IN);
for (i=0;i<IMGSIZE;i++)
{
in[i].r = image[3*i];
in[i].g = image[3*i+1];
in[i].b = image[3*i+2];
}
free(image); image =NULL;
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式三 : 一个个读取,效果最差
fseek(IN,0,SEEK_SET);
start_time=clock();
for (i=0;i<IMGSIZE;i++)
{
in[i].r = fgetc(IN);
in[i].g = fgetc(IN);
in[i].b = fgetc(IN);
}
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式一 :直接套用浮点公式
start_time=clock();
calc_lum1();
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式二 :转化为定点公式
start_time=clock();
calc_lum2();
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
table_init(); // 初始化表格
start_time=clock();
calc_lum3();
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
start_time=clock();
calc_lum5(); // 只有一个ALU 却使用双核模式则可能反而降速
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
start_time=clock();
calc_lum4(); // 只有一个ALU 却使用双核模式则可能反而降速
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
fwrite(out, 1, IMGSIZE, OUT);
printf("\n completely \n");
return 0;
}
//! 结论 :了解机器结构,编写和硬件对应的程序
/************************************************************
* 1 :文件读取建议整体读取,即使这样需要额外数据重排操作,
* 但是内存处理相对硬盘文件读写速度很快
* 2 :并非所有浮点操作转成定点操作,速度就能提高,如上由于PC机
* 有存在硬件浮点处理器,因此浮点操作不一定比定点慢
*************************************************************/
* Functions developed by zhong yunde. *
* filename : test_readfile.cpp *
* creat time : 2008/05/08 *
* 功能:测试文件读取模式(用于程序优化) *
\**************************************************/
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <time.h>
#define XSIZE 2592
#define YSIZE 1944
#define IMGSIZE XSIZE*YSIZE
typedef struct RGB {
unsigned char r;
unsigned char g;
unsigned char b;
} RGB;
struct RGB in[IMGSIZE];
unsigned char out[IMGSIZE];
// 彩色图象转黑白图象
void calc_lum1(void)
{
for(unsigned long i=0;i<IMGSIZE;i++)
{
double r,g,b,y;
r=in[i].r; g=in[i].g; b=in[i].b;
y=0.299*r+0.587*g+0.114*b;
out[i]=(unsigned char)y;
}
}
void calc_lum2(void)
{
for(unsigned long i=0;i<IMGSIZE;i++)
{
double r,g,b,y;
r=in[i].r; g=in[i].g; b=in[i].b;
y=(long)(r*1224+g*2404+b*467)>>12;
out[i]=(unsigned char)y;
}
}
// 查表数组
register unsigned char D[256],F[256],E[256];
void table_init()
{
for(short i=0;i<256;i++)
{
D[i]=(i*1224)>>12;
E[i]=(i*2404)>>12;
F[i]=(i*467)>>12;
}
}
void calc_lum3(void)
{
for(long i=0;i<IMGSIZE;i++)
{
unsigned char r,g,b;
r=D[in[i].r]; g=E[in[i].g]; b=F[in[i].b]; //查表
out[i] = r+g+b ;
}
}
inline void calc_lum5(void)
{
for(long i=0;i<IMGSIZE;i++)
{
unsigned char r,g,b;
r=D[in[i].r]; g=E[in[i].g]; b=F[in[i].b]; //查表
out[i] = r+g+b;
}
}
// 双核处理
void calc_lum4(void)
{
for(long i=0;i<IMGSIZE;i += 2)
{
double r,g,b,y, r1,g1,b1,y1;
// ALU1
r=D[in[i].r]; g=E[in[i].g]; b=F[in[i].b]; //查表
y=(long)(r+g+b)>>12;
out[i]=(unsigned char)y;
// ALU2
r1=D[in[i+1].r]; g1=E[in[i+1].g]; b1=F[in[i+1].b]; //查表
y1=(long)(r1+g1+b1);
out[i]=(unsigned char)y1;
}
}
int main()
{
char *filein = "Bretagneg.raw";
char *fileout = "Black.raw";
clock_t start_time,end_time;
FILE *IN=NULL, *OUT=NULL;
unsigned long i=0;
if ( NULL==(IN=fopen(filein, "rb"))) {
fprintf(stderr, "Failed to open %s for reading !!\n", filein);
return -1;
}
if ( NULL==(OUT=fopen(fileout, "wb")) ) {
fprintf(stderr, "Failed to open %s for reading !!\n", fileout);
return -1;
}
// 方式一 : 整体读取 ,指针赋值
fseek(IN,0,SEEK_SET);
start_time=clock();
unsigned char *image=(unsigned char *)malloc(sizeof(RGB)*IMGSIZE);
fread(image,sizeof(RGB)*IMGSIZE,1,IN);
for (i=0;i<IMGSIZE;i++)
{
in[i].r = *(image++);//image[3*i];
in[i].g = *(image++);//image[3*i+1];
in[i].b = *(image++);//image[3*i+2];
}
image -= sizeof(RGB)*IMGSIZE;
free(image); image =NULL;
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式二 : 整体读取 ,数组赋值(竟然比上面指针方式好!)
fseek(IN,0,SEEK_SET);
start_time=clock();
image=(unsigned char *)malloc(sizeof(RGB)*IMGSIZE);
fread(image,sizeof(RGB)*IMGSIZE,1,IN);
for (i=0;i<IMGSIZE;i++)
{
in[i].r = image[3*i];
in[i].g = image[3*i+1];
in[i].b = image[3*i+2];
}
free(image); image =NULL;
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式三 : 一个个读取,效果最差
fseek(IN,0,SEEK_SET);
start_time=clock();
for (i=0;i<IMGSIZE;i++)
{
in[i].r = fgetc(IN);
in[i].g = fgetc(IN);
in[i].b = fgetc(IN);
}
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式一 :直接套用浮点公式
start_time=clock();
calc_lum1();
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
// 方式二 :转化为定点公式
start_time=clock();
calc_lum2();
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
table_init(); // 初始化表格
start_time=clock();
calc_lum3();
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
start_time=clock();
calc_lum5(); // 只有一个ALU 却使用双核模式则可能反而降速
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
start_time=clock();
calc_lum4(); // 只有一个ALU 却使用双核模式则可能反而降速
end_time=clock();
printf("the Time of Transform=%6.5f(sec)\n",(float)(end_time-start_time)/CLOCKS_PER_SEC);
fwrite(out, 1, IMGSIZE, OUT);
printf("\n completely \n");
return 0;
}
//! 结论 :了解机器结构,编写和硬件对应的程序
/************************************************************
* 1 :文件读取建议整体读取,即使这样需要额外数据重排操作,
* 但是内存处理相对硬盘文件读写速度很快
* 2 :并非所有浮点操作转成定点操作,速度就能提高,如上由于PC机
* 有存在硬件浮点处理器,因此浮点操作不一定比定点慢
*************************************************************/