我想用C/C++编写一个程序,它将动态地读取一个网页并从中提取信息.举个例子想象一下,如果你想写一个应用程序来关注并记录一个ebay拍卖.抓住网页有简单的方法吗?提供此功能的库?是否有一种简单的方法来解析页面以获取特定数据?
看看cURL库:
#include#include int main(void) { CURL *curl; CURLcode res; curl = curl_easy_init(); if(curl) { curl_easy_setopt(curl, CURLOPT_URL, "curl.haxx.se"); res = curl_easy_perform(curl); /* always cleanup */ curl_easy_cleanup(curl); } return 0; }
顺便说一下,如果不严格要求C++.我鼓励你尝试使用C#或Java.它更容易,并有一个内置的方式.
Windows代码:
#include#include #include #pragma comment(lib,"ws2_32.lib") using namespace std; int main (){ WSADATA wsaData; if (WSAStartup(MAKEWORD(2,2), &wsaData) != 0) { cout << "WSAStartup failed.\n"; system("pause"); return 1; } SOCKET Socket=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP); struct hostent *host; host = gethostbyname("www.google.com"); SOCKADDR_IN SockAddr; SockAddr.sin_port=htons(80); SockAddr.sin_family=AF_INET; SockAddr.sin_addr.s_addr = *((unsigned long*)host->h_addr); cout << "Connecting...\n"; if(connect(Socket,(SOCKADDR*)(&SockAddr),sizeof(SockAddr)) != 0){ cout << "Could not connect"; system("pause"); return 1; } cout << "Connected.\n"; send(Socket,"GET / HTTP/1.1\r\nHost: www.google.com\r\nConnection: close\r\n\r\n", strlen("GET / HTTP/1.1\r\nHost: www.google.com\r\nConnection: close\r\n\r\n"),0); char buffer[10000]; int nDataLength; while ((nDataLength = recv(Socket,buffer,10000,0)) > 0){ int i = 0; while (buffer[i] >= 32 || buffer[i] == '\n' || buffer[i] == '\r') { cout << buffer[i]; i += 1; } } closesocket(Socket); WSACleanup(); system("pause"); return 0; }