2010年12月20日 星期一

C 語言用 Winsock 擷取網頁內容

#include <stdio.h>
#include <winsock.h>
#pragma comment(lib, "wsock32.lib")
//sockaddr_in 是 for TCP/IP 的 winsock struct
struct sockaddr_in serverAddress;
//hostent 是表示 IP位址資訊的 winsock struct
struct hostent *hptr;
//WSADATA 是存放 sockets 資訊的 winsock struct
WSADATA wsadata;
//SOCKET 描述子
SOCKET s;
#define bufferSize 1024
char *HostName;
char *FilePath;
FILE *pUFile;

//錯誤處理
void errorExit(char *errorMessage)
{
printf("錯誤訊息:%s,%d\n",errorMessage,WSAGetLastError() );
WSACleanup();
}

// 建立 Socket
void createSocket() 
{
if ( (s=socket(AF_INET, SOCK_STREAM, IPPROTO_TCP))<0) 
{
errorExit("Create Socket error");
}
}

//Connect
void connectTCP(char *HostName,char *FilePath ) 
{
//建立 hostent 結構
hptr=gethostbyname(HostName);

//建立 sockaddr_in 結構,變數為 serverAddress
memset(&serverAddress, 0, sizeof(serverAddress));
serverAddress.sin_family=AF_INET;
serverAddress.sin_port=htons(80);

//gethostbyname
memcpy(&serverAddress.sin_addr.s_addr,hptr->h_addr,hptr->h_length);

//建立 connect
printf("HostName:%s\n",HostName);
printf("FilePath:%s\n",FilePath);
printf("connect...\n");
if (connect(s, (struct sockaddr *) &serverAddress, sizeof(serverAddress)) < 0) 
{
errorExit("connect error");
}
}

//Send Data
void sendData() 
{
int sendResult;
char *requestMessage;
char requestBuffer[ bufferSize];
requestMessage="GET %s HTTP/1.1\r\n" \
"Host:%s\r\n" \
"User-Agent:Mozilla/5.0 (Linux; X11)\r\n" \
"Accept: */*\r\n" \
"Accept-Language: zh-TW\r\n" \
"Accept-Encoding: gzip, deflate\r\n" \
"Connection:  Keep-Alive\r\n" \
"\r\n";

sprintf(requestBuffer,requestMessage,FilePath,HostName);

//這裡要使用 sizeof 處理 buffer 長度,不要用 strlen
sendResult=send(s,requestBuffer,sizeof(requestBuffer),0);

if (sendResult==SOCKET_ERROR)
{
errorExit("Send Data error");
}
printf("送出位元組:%d\n",sendResult);

sendResult=shutdown(s,1);

if (sendResult==SOCKET_ERROR) 
{
errorExit("Shutdown error");
}
}

//Receive Data
void receiveData() 
{
char receiveBuffer[bufferSize];
int recvResult;
int recvBuflen=bufferSize;
int totalBytes=0;
do 
{
recvResult=recv ( s, receiveBuffer, recvBuflen -1, 0);
if ( recvResult>0 ) 
{
//printf("接收位元組:%d\n",recvResult);
totalBytes+=recvResult;
receiveBuffer[recvResult]='\0';
//螢幕印出,接收資料編碼為 UTF-8,
//若未處理轉碼,螢幕印出中文部份為亂碼
//printf("%s",receiveBuffer);

//印至檔案
pUFile=fopen("utf8.txt","a");
if ( pUFile !=NULL )
{
fputs(receiveBuffer,pUFile);
fclose(pUFile);
}
}
else if  ( recvResult==0 ) 
printf("連接關閉\n");
else 
errorExit("Receive error");
} while ( recvResult>0 );

printf("總共接收位元組:%d\n",totalBytes);
}

//argv[1]greeneyes.myweb.hinet.net
//argv[2]/2009/08/2009080201.html
int main ( int argc, char *argv[] ) 
{
if ( argc !=3 ) 
{
printf ("引數錯誤\n");
}
else 
{
HostName=argv[1];
FilePath=argv[2];

//調用任何 winsock 函數前,需進行 winsock 初始化
if (WSAStartup(MAKEWORD(2, 0), &wsadata) != 0)  
{
errorExit("WSAStartup error");
}

createSocket(); 
connectTCP(HostName,FilePath);
sendData();
receiveData();
closesocket(s);
WSACleanup();
}
return 0;
}

沒有留言:

張貼留言