/**************************************************************************** * NCSA Mosaic for the X Window System * * Software Development Group * * National Center for Supercomputing Applications * * University of Illinois at Urbana-Champaign * * 605 E. Springfield, Champaign IL 61820 * * mosaic@ncsa.uiuc.edu * * * * Copyright (C) 1993, Board of Trustees of the University of Illinois * * * * NCSA Mosaic software, both binary and source (hereafter, Software) is * * copyrighted by The Board of Trustees of the University of Illinois * * (UI), and ownership remains with the UI. * * * * The UI grants you (hereafter, Licensee) a license to use the Software * * for academic, research and internal business purposes only, without a * * fee. Licensee may distribute the binary and source code (if released) * * to third parties provided that the copyright notice and this statement * * appears on all copies and that no charge is associated with such * * copies. * * * * Licensee may make derivative works. However, if Licensee distributes * * any derivative work based on or derived from the Software, then * * Licensee will (1) notify NCSA regarding its distribution of the * * derivative work, and (2) clearly notify users that such derivative * * work is a modified version and not the original NCSA Mosaic * * distributed by the UI. * * * * Any Licensee wishing to make commercial use of the Software should * * contact the UI, c/o NCSA, to negotiate an appropriate license for such * * commercial use. Commercial use includes (1) integration of all or * * part of the source code into a product for sale or license by or on * * behalf of Licensee to third parties, or (2) distribution of the binary * * code or source code to third parties that need it to utilize a * * commercial product sold or licensed by or on behalf of Licensee. * * * * UI MAKES NO REPRESENTATIONS ABOUT THE SUITABILITY OF THIS SOFTWARE FOR * * ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED * * WARRANTY. THE UI SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY THE * * USERS OF THIS SOFTWARE. * * * * By using or copying this Software, Licensee agrees to abide by the * * copyright law and all other applicable laws of the U.S. including, but * * not limited to, export control laws, and the terms of this license. * * UI shall have the right to terminate this license immediately by * * written notice upon Licensee's breach of, or non-compliance with, any * * of its terms. Licensee may be held legally responsible for any * * copyright infringement that is caused or encouraged by Licensee's * * failure to abide by the terms of this license. * * * * Comments and questions are welcome and can be sent to * * mosaic-x@ncsa.uiuc.edu. * ****************************************************************************/ #include "../config.h" #ifndef VMS #include struct timeval Tv; struct timezone Tz; #else #include #endif #include #include #ifndef sun /* To get atoi. */ #include #endif #include "HTML.h" #include "HTMLamp.h" extern void FreeObjList(); extern struct mark_up *AddObj(); int NoBodyColors(Widget w); int NoBodyImages(Widget w); char *ParseMarkTag(); extern int tableSupportEnabled; #ifndef DISABLE_TRACE extern int htmlwTrace; #endif #ifdef NOT_ASCII #define TOLOWER(x) (tolower(x)) #else /* * A hack to speed up caseless_equal. Thanks to Quincey Koziol for * developing it for me */ unsigned char map_table[256]={ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23, 24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44, 45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,97,98, 99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, 116,117,118,119,120,121,122,91,92,93,94,95,96,97,98,99,100,101,102, 103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119, 120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136, 137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153, 154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170, 171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187, 188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204, 205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221, 222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, 239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255}; #define TOLOWER(x) (map_table[x]) #endif /* NOT_ASCII */ /* * Check if two strings are equal, ignoring case. * The strings must be of the same length to be equal. * return 1 if equal, 0 otherwise. */ int caseless_equal(str1, str2) char *str1; char *str2; { if ((str1 == NULL)||(str2 == NULL)) { return(0); } while ((*str1 != '\0')&&(*str2 != '\0')) { if (TOLOWER(*str1) != TOLOWER(*str2)) { return(0); } str1++; str2++; } if ((*str1 == '\0')&&(*str2 == '\0')) { return(1); } else { return(0); } } /* * Check if two strings are equal in the first count characters, ignoring case. * The strings must both be at least of length count to be equal. * return 1 if equal, 0 otherwise. */ int caseless_equal_prefix(str1, str2, cnt) char *str1; char *str2; int cnt; { int i; if ((str1 == NULL)||(str2 == NULL)) { return(0); } if (cnt < 1) { return(1); } for (i=0; i < cnt; i++) { if (TOLOWER(*str1) != TOLOWER(*str2)) { return(0); } str1++; str2++; } return(1); } /* * Clean up the white space in a string. * Remove all leading and trailing whitespace, and turn all * internal whitespace into single spaces separating words. * The cleaning is done by rearranging the chars in the passed * txt buffer. The resultant string will probably be shorter, * it can never get longer. */ void clean_white_space(txt) char *txt; { char *ptr; char *start; start = txt; ptr = txt; /* * Remove leading white space */ while (isspace((int)*ptr)) { ptr++; } /* * find a word, copying if we removed some space already */ if (start == ptr) { while ((!isspace((int)*ptr))&&(*ptr != '\0')) { ptr++; } start = ptr; } else { while ((!isspace((int)*ptr))&&(*ptr != '\0')) { *start++ = *ptr++; } } while (*ptr != '\0') { /* * Remove trailing whitespace. */ while (isspace((int)*ptr)) { ptr++; } if (*ptr == '\0') { break; } /* * If there are more words, insert a space and if space was * removed move up remaining text. */ *start++ = ' '; if (start == ptr) { while ((!isspace((int)*ptr))&&(*ptr != '\0')) { ptr++; } start = ptr; } else { while ((!isspace((int)*ptr))&&(*ptr != '\0')) { *start++ = *ptr++; } } } *start = '\0'; } /* * parse an amperstand escape, and return the appropriate character, or * '\0' on error. * we should really only use caseless_equal_prefix for unterminated, and use * caseless_equal otherwise, but since there are so many escapes, and I * don't want to type everything twice, I always use caseless_equal_prefix * Turns out the escapes are case sensitive, use strncmp. * termination states: * 0: terminated with a ';' * 1: unterminated * 2: terminated with whitespace */ char ExpandEscapes(esc, endp, termination) char *esc; char **endp; int termination; { int cnt; char val; int unterminated; unterminated = (termination & 0x01); esc++; if (*esc == '#') { if (unterminated) { char *tptr; char tchar; tptr = (char *)(esc + 1); while (isdigit((int)*tptr)) { tptr++; } tchar = *tptr; *tptr = '\0'; val = (char)atoi((esc + 1)); *tptr = tchar; *endp = tptr; } else { val = (char)atoi((esc + 1)); *endp = (char *)(esc + strlen(esc)); } } else { int escLen, ampLen; cnt = 0; escLen = strlen(esc); while (AmpEscapes[cnt].tag != NULL) { ampLen = strlen(AmpEscapes[cnt].tag); if ((escLen == ampLen) && (strncmp(esc, AmpEscapes[cnt].tag, ampLen) == 0)) { val = AmpEscapes[cnt].value; *endp = (char *)(esc + strlen(AmpEscapes[cnt].tag)); break; } cnt++; } if (AmpEscapes[cnt].tag == NULL) { #ifndef DISABLE_TRACE if (htmlwTrace) { fprintf(stderr, "Error bad & string\n"); } #endif val = '\0'; *endp = (char *)NULL; } } return(val); } /* * Clean the special HTML character escapes out of the text and replace * them with the appropriate characters "<" = "<", ">" = ">", * "&" = "&" * GAG: apperantly < etc. can be left unterminated, what a nightmare. * Ok, better, they have to be terminated with white-space or ';'. * the '&' character must be immediately followed by a letter to be * a valid escape sequence. Other &'s are left alone. * The cleaning is done by rearranging chars in the passed txt buffer. * if any escapes are replaced, the string becomes shorter. */ void clean_text(txt) char *txt; { int unterminated; int space_terminated; char *ptr; char *ptr2; char *start; char *text; char *tend; char tchar; char val; if (txt == NULL) { return; } /* * Quick scan to find escape sequences. * Escape is '&' followed by a letter (or a hash mark). * return if there are none. */ ptr = txt; while (*ptr != '\0') { if ((*ptr == '&')&& ((isalpha((int)*(ptr + 1)))||(*(ptr + 1) == '#'))) { break; } ptr++; } if (*ptr == '\0') { return; } /* * Loop, replaceing escape sequences, and moving up remaining * text. */ ptr2 = ptr; while (*ptr != '\0') { unterminated = 0; space_terminated = 0; /* * Extract the escape sequence from start to ptr */ start = ptr; while ((*ptr != ';')&&(!isspace((int)*ptr))&&(*ptr != '\0')) { ptr++; } if (*ptr == '\0') { #ifndef DISABLE_TRACE if (htmlwTrace) { fprintf(stderr, "warning: unterminated & (%s)\n", start); } #endif unterminated = 1; } else if (isspace((int)*ptr)) { space_terminated = 1; } /* * Copy the escape sequence into a separate buffer. * Then clean spaces so the "& lt ;" = "<" etc. * The cleaning should be unnecessary. */ tchar = *ptr; *ptr = '\0'; text = (char *)malloc(strlen(start) + 1); if (text == NULL) { #ifndef DISABLE_TRACE if (htmlwTrace) { fprintf(stderr, "Cannot malloc space for & text\n"); } #endif *ptr = tchar; return; } strcpy(text, start); *ptr = tchar; clean_white_space(text); /* * Replace escape sequence with appropriate character */ val = ExpandEscapes(text, &tend, ((space_terminated << 1) + unterminated)); if (val != '\0') { if (unterminated) { tchar = *tend; *tend = '\0'; ptr = (char *)(start + strlen(text) - 1); *tend = tchar; } else if (space_terminated) { ptr--; } *ptr2 = val; unterminated = 0; space_terminated = 0; } /* * invalid escape sequence. skip it. */ else { #ifndef DISABLE_TRACE if (htmlwTrace) { fprintf(stderr, "Error bad & string\n"); } #endif ptr = start; *ptr2 = *ptr; } free(text); /* * Copy forward remaining text until you find the next * escape sequence */ ptr2++; ptr++; while (*ptr != '\0') { if ((*ptr == '&')&& ((isalpha((int)*(ptr + 1)))||(*(ptr + 1) == '#'))) { break; } *ptr2++ = *ptr++; } } *ptr2 = '\0'; } /* * Get a block of text from a HTML document. * All text from start to the end, or the first mark * (a mark is '<' or ''. From the text, determine * its type, and fill in a mark_up structure to return. Also returns * endp pointing to the ttrailing '>' in the original string. */ struct mark_up * get_mark(start, endp) char *start; char **endp; { char *ptr; char *text; char tchar; struct mark_up *mark; int comment=0; /* amb - comment==1 if we are in a comment */ char *first_gt=NULL; /* keep track of ">" for old broken comments */ if (start == NULL) { return(NULL); } if (*start != '<') { return(NULL); } /* amb - check if we are in a comment, start tag is