C言語で指定したURLのHTMLを取得するサンプルです。
Windows用です。ws2_32.lib をリンクして下さい。
GetHtmlByUrl関数に取得したいURLを指定して下さい。
#include <windows.h> #include <stdio.h> #define CONNECT_RETRY_COUNT 5 #define BUF_LEN 4096 char *GetHtmlByHostPortPath( SOCKET s , char *host , unsigned short port , char *path ){ static int count = 0; int buflen; char *pContents = NULL , *pNewContents = NULL; DWORD dwReadSize , dwTotalReadSize , dwCurrentBufSize; DWORD dwErr = -1; char *buf = NULL; buf = (char *)malloc( BUF_LEN ); if( NULL == buf ){ goto Done; } buflen = BUF_LEN; buflen -= sprintf_s( buf , buflen , "GET /%s HTTP/1.1\r\n" , path ); send( s , buf , strlen(buf) , 0 ); buflen -= sprintf_s( buf , buflen , "Host: %s:%d\r\n" , host , port ); send( s , buf , strlen(buf) , 0 ); buflen -= sprintf_s( buf , buflen , "Connection: keep-alive\r\n" ); send( s , buf , strlen(buf) , 0 ); send( s , "\r\n" , strlen("\r\n") , 0 ); dwTotalReadSize = 0; dwCurrentBufSize = BUF_LEN * 5; pContents = (char *)malloc( dwCurrentBufSize ); if( NULL == pContents ){ goto Done; } while( TRUE ){ dwReadSize = recv( s , &pContents[dwTotalReadSize] , dwCurrentBufSize - dwTotalReadSize , 0 ); if( SOCKET_ERROR == dwReadSize ){ goto Done; } if( 0 == dwReadSize ){ pContents[dwTotalReadSize] = 0; break; } dwTotalReadSize += dwReadSize; if( dwCurrentBufSize == dwTotalReadSize ){ dwCurrentBufSize += BUF_LEN * 5; pNewContents = (char *)malloc( dwCurrentBufSize ); if( NULL == pNewContents ){ goto Done; } memcpy( pNewContents , pContents , dwCurrentBufSize - BUF_LEN * 5 ); free( pContents ); pContents = pNewContents; pNewContents = NULL; } } dwErr = 0; Done: if( NULL != pNewContents ){ free( pNewContents ); } if( NULL != buf ){ free( buf ); } if( 0 != dwErr ){ if( NULL != pContents ){ free( pContents ); pContents = NULL; } } return pContents; } char *GetHostPortPathByUrl( char *url , char **pHost , unsigned short *pPort , char **pPath ){ char *host_path; char *p; host_path = (char *)malloc( strlen( url ) + 1 ); if( NULL == host_path ){ goto Done; } if( 1 != sscanf_s( url , "http://%s" , host_path , strlen( url ) ) ){ free( host_path ); host_path = NULL; goto Done; } p = strchr( host_path , '/' ); if( NULL == p ){ *pPath = ""; }else{ *pPath = p + 1; *p = '\0'; } *pHost = host_path; p = strchr( *pHost , ':' ); if( NULL == p ){ *pPort = 80; }else{ *pPort = atoi( p + 1 ); if( *pPort <= 0 ){ *pPort = 80; } *p = '\0'; } Done: return host_path; } SOCKET ConnectHost( char *host , unsigned short port ){ char *pHostInfoBuf = NULL; SOCKET s = INVALID_SOCKET; DWORD dwConnectRetry; struct hostent *serverent; struct sockaddr_in serveraddr; serverent = gethostbyname( host ); if( NULL == serverent ){ goto Done; } memset( &serveraddr , 0 , sizeof(serveraddr) ); serveraddr.sin_family = AF_INET; memcpy( &serveraddr.sin_addr , *serverent->h_addr_list , serverent->h_length ); serveraddr.sin_port = htons( port ); s = socket( AF_INET , SOCK_STREAM , 0 ); if( INVALID_SOCKET == s ){ goto Done; } dwConnectRetry = CONNECT_RETRY_COUNT; while( TRUE ){ if( SOCKET_ERROR != connect( s , (struct sockaddr *)&serveraddr , sizeof(serveraddr) ) ){ break; } dwConnectRetry--; if( 0 == dwConnectRetry ){ closesocket( s ); s = INVALID_SOCKET; goto Done; } Sleep( 1000 ); } Done: return s; } char *GetHtmlByUrl( char *url ){ char *pContents; char *pHostInfoBuf = NULL , *host , *path; unsigned short port; SOCKET s = INVALID_SOCKET; pHostInfoBuf = GetHostPortPathByUrl( url , &host , &port , &path ); if( NULL == pHostInfoBuf ){ goto Done; } s = ConnectHost( host , port ); if( INVALID_SOCKET == s ){ goto Done; } pContents = GetHtmlByHostPortPath( s , host , port , path ); Done: if( INVALID_SOCKET != s ){ closesocket( s ); } if( NULL != pHostInfoBuf ){ free( pHostInfoBuf ); } return pContents; } int main(int argc, char *argv[]){ WORD version = MAKEWORD(2, 0); WSADATA wsa; WSAStartup(version, &wsa); char *pContents; pContents = GetHtmlByUrl( "https://pcvogel.sarakura.net/" ); if( NULL != pContents ){ printf( "%s" , pContents ); free( pContents ); } return 0; }
コメント