C言語で指定したURLのHTMLを取得するサンプルです。
Windows用です。ws2_32.lib をリンクして下さい。
GetHtmlByUrl関数に取得したいURLを指定して下さい。
#include <windows.h>
#include <stdio.h>
#define CONNECT_RETRY_COUNT 5
#define BUF_LEN 4096
char *GetHtmlByHostPortPath( SOCKET s , char *host , unsigned short port , char *path ){
static int count = 0;
int buflen;
char *pContents = NULL , *pNewContents = NULL;
DWORD dwReadSize , dwTotalReadSize , dwCurrentBufSize;
DWORD dwErr = -1;
char *buf = NULL;
buf = (char *)malloc( BUF_LEN );
if( NULL == buf ){
goto Done;
}
buflen = BUF_LEN;
buflen -= sprintf_s( buf , buflen , "GET /%s HTTP/1.1\r\n" , path );
send( s , buf , strlen(buf) , 0 );
buflen -= sprintf_s( buf , buflen , "Host: %s:%d\r\n" , host , port );
send( s , buf , strlen(buf) , 0 );
buflen -= sprintf_s( buf , buflen , "Connection: keep-alive\r\n" );
send( s , buf , strlen(buf) , 0 );
send( s , "\r\n" , strlen("\r\n") , 0 );
dwTotalReadSize = 0;
dwCurrentBufSize = BUF_LEN * 5;
pContents = (char *)malloc( dwCurrentBufSize );
if( NULL == pContents ){
goto Done;
}
while( TRUE ){
dwReadSize = recv( s , &pContents[dwTotalReadSize] , dwCurrentBufSize - dwTotalReadSize , 0 );
if( SOCKET_ERROR == dwReadSize ){
goto Done;
}
if( 0 == dwReadSize ){
pContents[dwTotalReadSize] = 0;
break;
}
dwTotalReadSize += dwReadSize;
if( dwCurrentBufSize == dwTotalReadSize ){
dwCurrentBufSize += BUF_LEN * 5;
pNewContents = (char *)malloc( dwCurrentBufSize );
if( NULL == pNewContents ){
goto Done;
}
memcpy( pNewContents , pContents , dwCurrentBufSize - BUF_LEN * 5 );
free( pContents );
pContents = pNewContents;
pNewContents = NULL;
}
}
dwErr = 0;
Done:
if( NULL != pNewContents ){
free( pNewContents );
}
if( NULL != buf ){
free( buf );
}
if( 0 != dwErr ){
if( NULL != pContents ){
free( pContents );
pContents = NULL;
}
}
return pContents;
}
char *GetHostPortPathByUrl( char *url , char **pHost , unsigned short *pPort , char **pPath ){
char *host_path;
char *p;
host_path = (char *)malloc( strlen( url ) + 1 );
if( NULL == host_path ){
goto Done;
}
if( 1 != sscanf_s( url , "http://%s" , host_path , strlen( url ) ) ){
free( host_path );
host_path = NULL;
goto Done;
}
p = strchr( host_path , '/' );
if( NULL == p ){
*pPath = "";
}else{
*pPath = p + 1;
*p = '\0';
}
*pHost = host_path;
p = strchr( *pHost , ':' );
if( NULL == p ){
*pPort = 80;
}else{
*pPort = atoi( p + 1 );
if( *pPort <= 0 ){
*pPort = 80;
}
*p = '\0';
}
Done:
return host_path;
}
SOCKET ConnectHost( char *host , unsigned short port ){
char *pHostInfoBuf = NULL;
SOCKET s = INVALID_SOCKET;
DWORD dwConnectRetry;
struct hostent *serverent;
struct sockaddr_in serveraddr;
serverent = gethostbyname( host );
if( NULL == serverent ){
goto Done;
}
memset( &serveraddr , 0 , sizeof(serveraddr) );
serveraddr.sin_family = AF_INET;
memcpy( &serveraddr.sin_addr , *serverent->h_addr_list , serverent->h_length );
serveraddr.sin_port = htons( port );
s = socket( AF_INET , SOCK_STREAM , 0 );
if( INVALID_SOCKET == s ){
goto Done;
}
dwConnectRetry = CONNECT_RETRY_COUNT;
while( TRUE ){
if( SOCKET_ERROR != connect( s , (struct sockaddr *)&serveraddr , sizeof(serveraddr) ) ){
break;
}
dwConnectRetry--;
if( 0 == dwConnectRetry ){
closesocket( s );
s = INVALID_SOCKET;
goto Done;
}
Sleep( 1000 );
}
Done:
return s;
}
char *GetHtmlByUrl( char *url ){
char *pContents;
char *pHostInfoBuf = NULL , *host , *path;
unsigned short port;
SOCKET s = INVALID_SOCKET;
pHostInfoBuf = GetHostPortPathByUrl( url , &host , &port , &path );
if( NULL == pHostInfoBuf ){
goto Done;
}
s = ConnectHost( host , port );
if( INVALID_SOCKET == s ){
goto Done;
}
pContents = GetHtmlByHostPortPath( s , host , port , path );
Done:
if( INVALID_SOCKET != s ){
closesocket( s );
}
if( NULL != pHostInfoBuf ){
free( pHostInfoBuf );
}
return pContents;
}
int main(int argc, char *argv[]){
WORD version = MAKEWORD(2, 0);
WSADATA wsa;
WSAStartup(version, &wsa);
char *pContents;
pContents = GetHtmlByUrl( "https://pcvogel.sarakura.net/" );
if( NULL != pContents ){
printf( "%s" , pContents );
free( pContents );
}
return 0;
}
コメント