// GETURL.C -- Win32 console app -- rename gethttp, allow w/o http:// // Andrew Schulman, February 1997 // andrew@ora.com // cl geturl.c wsock32.lib // geturl [-head] [-split] [-input file] [-post data] // also handles URLs of form , converts to http://xxxx // TODO: // -- accept filename on cmdline to save to (e.g., for GIF) // -- after have filename, provide option to show in browser (via ShellExecute ddeexec) // -- doesn't work for ftp:// yet! // -- https:// doesn't really work, obviously! // -- support mailto: to show SMTP vs. MAPI? // -- make non-Windows version (no WSAStartup, winsock.h, etc.) // -- support Connection: Keep-Alive // -- add -links, -tag name,name,name options // -- improve -split option: tags on single line, no extra blank line // -- support :port, e.g., http://www.eb.com:195 #include #include #include #include #include #include "winsock.h" void fail(const char *s) { puts(s); exit(1); } #define msg(s) { printf("FAIL: %s\n", s); return 0; } char *nomem = "insufficient memory"; #define BUFFER_SIZE 20480 #define WINSOCK_VERSION 0x0101 #define NO_FLAGS 0 #define HTTP_PORT 80 #define HTTPS_PORT 443 #define FTP_PORT 21 // options int do_head = 0, do_manual_input = 0, do_post = 0, do_split = 0, do_base = 0; int do_loc = 1, do_verbose = 1; char *input_file, *post_data, *base; SOCKET ConnectWebServerSocket(char *host, int port) { static int did_startup = 0; static WSADATA wsaData; LPHOSTENT pHostEnt; SOCKADDR_IN sockAddr; struct in_addr *addr; u_char b[4]; SOCKET sock = INVALID_SOCKET; if (! did_startup) { if (WSAStartup(WINSOCK_VERSION, &wsaData)) msg("WSAStartup"); did_startup = 1; } sockAddr.sin_family = AF_INET; sockAddr.sin_port = htons(port); if (isdigit(*host)) { // already have IP address; don't need DNS lookup sscanf(host, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]); sockAddr.sin_addr.s_addr = *((u_long *) b); } else { if (!(pHostEnt = gethostbyname(host))) msg("gethostbyname"); sockAddr.sin_addr = *((LPIN_ADDR)*pHostEnt->h_addr_list); } // TODO:since gethostbyname is expensive? (DNS), see if same as last time? // TODO: SO_USELOOPBACK for http://localhost, "local CGI", etc. // TODO: maybe use SO_KEEPALIVE? if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) msg("socket"); if (do_verbose) { addr = (struct in_addr *) &sockAddr.sin_addr; memcpy(b, (u_char *) &addr->S_un.S_un_b, sizeof(unsigned long)); printf("Connecting to %u.%u.%u.%u\n", b[0], b[1], b[2], b[3]); } if (connect(sock, (LPSOCKADDR)&sockAddr, sizeof(sockAddr)) != 0) msg("connect"); return sock; } int SendWebQuery(SOCKET sock, char * szQuery) { char *request; if (! (request = malloc(2048))) msg(nomem); if (do_manual_input) { char line[80]; FILE *f = fopen(input_file, "r"); if (f == (FILE *) -1) msg("Can't open input file"); request[0] = 0; while (fgets(line, 80, f)) { if (strlen(request)+strlen(line) >= 2048) msg("Sorry, 2k limit"); strcat(request, line[0] ? line : "\n"); } fclose(f); } else if (do_post) { sprintf(request, "POST %s HTTP/1.0\n" "Content-type: application/x-www-form-urlencoded\n" "Content-length: %u\n\n" "%s\n", szQuery, strlen(post_data), post_data); } else { sprintf(request, "%s %s HTTP/1.0\n\n", (do_head ? "HEAD" : "GET"), szQuery); } if (send(sock, request, strlen(request), NO_FLAGS) == SOCKET_ERROR) msg("send"); free(request); return 0; } int get_url(char *url); static char *location_str = "Location:" ; static int location_len = 0; // should first check for "302 Redirection"!! // should only do inside header, before first "\n\n"!! int look_for_reloc(char *buf, int len) { char *s, *s2; if (! location_len) location_len = strlen(location_str); if (s = strstr(buf, location_str)) { char *url; if (! (url = malloc(512))) msg(nomem); if (s2 = strchr(s, '\r')) *s2 = '\0'; // seal off line if (s2 = strchr(s, '\n')) *s2 = '\0'; // seal off line strncpy(url, s, 512); puts(url); // if url doesn't contain "//" or "/" then is relative URL!! // e.g., www.truevalue.com -> index.cgi if (get_url(url)) { free(url); return 1; } else puts("--- Couldn't get redirected URL ---"); free(url); } return 0; } UINT RecvWebFile(SOCKET sock, char *buf) { int len; if ((len = recv(sock, buf, BUFFER_SIZE, NO_FLAGS)) == SOCKET_ERROR) msg("recv"); if (do_loc && look_for_reloc(buf, len)) return 0; return len; } int get_file(SOCKET sock, char *pathname); // confusing name, because also have -split option! int split(char *url, char *hostname, char *pathname) { char *buf, *h, *f, *s; int protocol = HTTP_PORT; if (! location_len) location_len = strlen(location_str); if (! (buf = malloc(2048))) msg(nomem); // can do redirection for now with geturl | grep Location: | geturl -stdin if (strncmp(url, location_str, location_len) == 0) url += location_len; // if quotes in URL, then yank out stuff within quotes // to handle lines gracefully if (strchr(url, '\"')) { strcpy(buf, url); url = buf; while (*url != '\"') url++; url++; if (s = strchr(url, '\"')) *s = '\0'; // block off any close quote } while (isspace(*url)) url++; if (do_base && (! strstr(url, "//"))) { static char *buf = (char *) 0; if (! buf) if (! (buf = malloc(2048))) msg(nomem); strcpy(buf, base); strcat(buf, url); url = buf; } if (strstr(url, "//")) { if (strncmp(url, "http://", 7) == 0) { url += 7; protocol = HTTP_PORT; } else if (strncmp(url, "https://", 8) == 0) { url += 8; protocol = HTTPS_PORT; } else if (strncmp(url, "ftp://", 6) == 0) { url += 6; protocol = FTP_PORT; } else { protocol = 0; msg("protocol"); } } h = url; f = url; while (*f && (*f != '/')) f++; strcpy(pathname, *f ? f : "/"); *f = 0; strcpy(hostname, h); free(buf); return protocol; } void display(char *buf, int recv) { static int in_tag = 0; // make static so carries over multiple buffers static int got_nl = 0; if (do_split) { char *s; int i; for (i=recv, s=buf; i--; s++) switch (*s) { case '\n' : case '\r' : if (in_tag) putchar(' '); else { if (! got_nl) putchar(*s); got_nl = 1; } break; case '<' : if (! got_nl) putchar('\n'); putchar('<'); in_tag++; break; case '>' : putchar('>'); putchar('\n'); got_nl = 1; in_tag--; break; default : putchar(*s); got_nl = 0; break; } } else fwrite(buf, recv, 1, stdout); } int get_file(SOCKET sock, char *pathname) { char *buf; UINT recv; char *s; if (! (buf = malloc(BUFFER_SIZE))) msg(nomem); if (SendWebQuery(sock, pathname) != 0) msg("SendWebQuery"); while (((recv = RecvWebFile(sock, buf)) != 0)) display(buf, recv); free(buf); putchar('\n'); return 1; } int get_url(char *url) { char hostname[256], pathname[512]; SOCKET sock; int protocol; if (! (protocol = split(url, hostname, pathname))) return 0; // printf("get_url: [%u,%s,%s]\n", protocol, hostname, pathname); if ((sock = ConnectWebServerSocket(hostname, protocol)) == INVALID_SOCKET) msg("ConnectWebServerSocket"); get_file(sock, pathname); closesocket(sock); return 1; } char *usage = "usage: geturl [options] \n" " options:\n" " -noloc : don't do HTTP relocations (default on)\n" " -base : use addr as base for all relative URLs\n" " -head : do HTTP HEAD (default GET)\n" " -post : do HTTP POST of data\n" " -input : get all HTTP headers from file\n" " -stdin : get URLs from stdin\n" " -split : break HTML output into lines on tags\n"; int main(int argc, char *argv[]) { int i; int do_stdin = 0; if (argc < 2) fail(usage); for (i=1; i