lynx-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: lynx-dev bug in dev25


From: Leonid Pauzner
Subject: Re: lynx-dev bug in dev25
Date: Wed, 9 Sep 1998 19:47:34 +0400 (MSD)

>      * From: address@hidden (Larry W. Virden)
>      * Date: Mon, 7 Sep 1998 11:31:46 -0400
>      * Reply-To: address@hidden
>      * Sender: address@hidden
>      _________________________________________________________________

> The code in UCGetLYhndl_byMIME() returns a -1 if the character set in
> question is not found.  Other code uses the return code value without
> ever looking for a bad value.  This causes a core dump in
>   [1] strncmp(0xffdbfda0, 0x240260, 0x10, 0x0, 0x49, 0x57657374), at 
> 0xef5e40b0
=>>[2] HTMLSetRawModeDefault(i = -1), line 601 in "LYCharSets.c"
>   [3] character_set_fun(value = 0xefffdf55 "ISO Latin 1"), line 532 in 
> "LYReadC
> FG.c"
>   [4] read_cfg(cfg_filename = 0x2d5e18 
> "/projects/intranet/lib/lynx//lynx.cfg",
>  parent_filename = 0x2a426c "main program", nesting_level = 1), line 1095 in 
> "L
> YReadCFG.c"
>   [5] main(argc = 1, argv = 0xefffe734), line 1243 in "LYMain.c"


Larry, try this patch to check whether it solve your problem or not
(applyed against clean dev25, but differ in LYMain.c one line
because of my previous patch for hightext&partial mode):


* Chartrans recovery and cleanup: use 'safeUCGetLYhndl_byMIME()' for
reading charset info from lynx.cfg/userdefs.h/command line switches
to recover possible typos by fallback to ISO-8859-1; add comments
in UCMap.h; move HTMLSetRawModeDefault() into HTMLUseCharacterSet()
to reduce clutter. - LP




> --
> Larry W. Virden                 INET: address@hidden

> One more patch (against clean dev25 + my previous patch for partial mode):




diff -u old/lycharse.c ./lycharse.c
--- old/lycharse.c      Mon Sep  7 03:02:16 1998
+++ ./lycharse.c        Wed Sep  9 12:04:00 1998
@@ -591,7 +591,7 @@
  *  Function to set the LYDefaultRawMode value
  *  based on the selected character set. - FM
  */
-PUBLIC void HTMLSetRawModeDefault ARGS1(int,i)
+PRIVATE void HTMLSetRawModeDefault ARGS1(int,i)
 {
     if (!strncmp(LYchar_set_names[i], "Western (ISO-8859-1)", 20) ||
        !strncmp(LYchar_set_names[i], "Chinese", 7) ||
@@ -882,6 +882,7 @@
  */
 PUBLIC void HTMLUseCharacterSet ARGS1(int,i)
 {
+    HTMLSetRawModeDefault(i);
     p_entity_values = LYCharSets[i];
     HTMLSetCharacterHandling(i);     /* set LYRawMode and CJK attributes */
     HTMLSetHaveCJKCharacterSet(i);
diff -u old/lycharse.h ./lycharse.h
--- old/lycharse.h      Thu Aug  6 05:28:22 1998
+++ ./lycharse.h        Wed Sep  9 12:02:16 1998
@@ -31,7 +31,6 @@
 extern int LYNumCharsets;
 extern LYUCcharset LYCharSet_UC[];
 extern void HTMLSetCharacterHandling PARAMS((int i));
-extern void HTMLSetRawModeDefault PARAMS((int i));
 extern void HTMLSetUseDefaultRawMode PARAMS((int i, BOOLEAN modeflag));
 extern void HTMLSetHaveCJKCharacterSet PARAMS((int i));
 extern void HTMLUseCharacterSet PARAMS((int i));
diff -u old/lymain.c ./lymain.c
--- old/lymain.c        Tue Sep  8 14:53:04 1998
+++ ./lymain.c  Wed Sep  9 19:08:44 1998
@@ -755,19 +755,6 @@
     StrAllocCopy(URLDomainPrefixes, URL_DOMAIN_PREFIXES);
     StrAllocCopy(URLDomainSuffixes, URL_DOMAIN_SUFFIXES);
     StrAllocCopy(XLoadImageCommand, XLOADIMAGE_COMMAND);
-    /*
-     * Set up the compilation default character set. - FM
-     */
-    for (i = 0; LYchar_set_names[i]; i++) {
-       if (!strncmp(CHARACTER_SET, LYchar_set_names[i],
-                    strlen(CHARACTER_SET))) {
-           current_char_set=i;
-           break;
-       }
-    }
-    if (!LYchar_set_names[i])
-       current_char_set = i = 0;
-    HTMLSetRawModeDefault(i);

     /*
      * Disable news posting if the compilation-based
@@ -1160,6 +1147,10 @@
        fprintf(stderr, "\nLynx character sets not declared.\n\n");
        exit(-1);
     }
+    /*
+     * Set up the compilation default character set. - FM
+     */
+    current_char_set = safeUCGetLYhndl_byMIME(CHARACTER_SET);

 #if defined(USE_HASH)
     /*
@@ -1891,14 +1882,14 @@
        char **,                argv GCC_UNUSED,
        char *,                 next_arg)
 {
-    if (next_arg == 0) {
-       UCLYhndl_for_unspec = UCGetLYhndl_byMIME("iso-8859-1");
-    } else {
-       LYLowerCase(next_arg);
-       StrAllocCopy(UCAssume_MIMEcharset, next_arg);
-       if (UCAssume_MIMEcharset && *UCAssume_MIMEcharset)
-           UCLYhndl_for_unspec = UCGetLYhndl_byMIME(UCAssume_MIMEcharset);
-    }
+       UCLYhndl_for_unspec =
+                       safeUCGetLYhndl_byMIME(next_arg);
+       StrAllocCopy(UCAssume_MIMEcharset,
+                       LYCharSet_UC[UCLYhndl_for_unspec].MIMEname);
+/*        this may be a memory for bogus typo -
+          StrAllocCopy(UCAssume_MIMEcharset, next_arg);
+          LYLowerCase(UCAssume_MIMEcharset);   */
+
     return 0;
 }

@@ -1908,15 +1899,10 @@
        char **,                argv GCC_UNUSED,
        char *,                 next_arg)
 {
-    if (next_arg == 0) {
-       UCLYhndl_HTFile_for_unspec = UCGetLYhndl_byMIME("iso-8859-1");
-    } else {
-       LYLowerCase(next_arg);
-       StrAllocCopy(UCAssume_localMIMEcharset, next_arg);
-       if (UCAssume_localMIMEcharset && *UCAssume_localMIMEcharset)
-           UCLYhndl_HTFile_for_unspec =
-               UCGetLYhndl_byMIME(UCAssume_localMIMEcharset);
-    }
+       UCLYhndl_HTFile_for_unspec =
+                       safeUCGetLYhndl_byMIME(next_arg);
+       StrAllocCopy(UCAssume_localMIMEcharset,
+                       LYCharSet_UC[UCLYhndl_HTFile_for_unspec].MIMEname);
     return 0;
 }

@@ -1926,14 +1912,10 @@
        char **,                argv GCC_UNUSED,
        char *,                 next_arg)
 {
-    if (next_arg == 0) {
-       UCLYhndl_for_unrec = UCGetLYhndl_byMIME("iso-8859-1");
-    } else {
-       LYLowerCase(next_arg);
-       StrAllocCopy(UCAssume_unrecMIMEcharset, next_arg);
-       if (UCAssume_unrecMIMEcharset && *UCAssume_unrecMIMEcharset)
-           UCLYhndl_for_unrec = UCGetLYhndl_byMIME(UCAssume_unrecMIMEcharset);
-    }
+       UCLYhndl_for_unrec =
+                       safeUCGetLYhndl_byMIME(next_arg);
+       StrAllocCopy(UCAssume_unrecMIMEcharset,
+                       LYCharSet_UC[UCLYhndl_for_unrec].MIMEname);
     return 0;
 }

diff -u old/lyoption.c ./lyoption.c
--- old/lyoption.c      Mon Sep  7 03:02:16 1998
+++ ./lyoption.c        Wed Sep  9 12:05:50 1998
@@ -837,7 +837,6 @@
                 *  character set if changed. - FM
                 */
                if (CurrentCharSet != current_char_set) {
-                   HTMLSetRawModeDefault(current_char_set);
                    LYUseDefaultRawMode = TRUE;
                    HTMLUseCharacterSet(current_char_set);
                    CurrentCharSet = current_char_set;
@@ -3611,7 +3610,6 @@
                 *  Set the LYUseDefaultRawMode value and character
                 *  handling if LYRawMode was changed. - FM
                 */
-               HTMLSetRawModeDefault(current_char_set);
                LYUseDefaultRawMode = TRUE;
                HTMLUseCharacterSet(current_char_set);
            }
diff -u old/lyrcfile.c ./lyrcfile.c
--- old/lyrcfile.c      Mon Sep  7 03:02:16 1998
+++ ./lyrcfile.c        Wed Sep  9 12:06:22 1998
@@ -251,7 +251,6 @@
            for (; LYchar_set_names[i]; i++) {
                if (!strncmp(cp, LYchar_set_names[i], strlen(cp))) {
                    current_char_set=i;
-                   HTMLSetRawModeDefault(i);
                    break;
                }
            }
diff -u old/lyreadcf.c ./lyreadcf.c
--- old/lyreadcf.c      Mon Sep  7 03:02:16 1998
+++ ./lyreadcf.c        Wed Sep  9 19:09:34 1998
@@ -489,27 +489,34 @@
 static int assume_charset_fun ARGS1(
        char *,         value)
 {
-    StrAllocCopy(UCAssume_MIMEcharset, value);
-    LYLowerCase(UCAssume_MIMEcharset);
-    UCLYhndl_for_unspec = UCGetLYhndl_byMIME(UCAssume_MIMEcharset);
+    UCLYhndl_for_unspec =
+                       safeUCGetLYhndl_byMIME(value);
+    StrAllocCopy(UCAssume_MIMEcharset,
+                       LYCharSet_UC[UCLYhndl_for_unspec].MIMEname);
+/*    this may be a memory for bogus typo -
+      StrAllocCopy(UCAssume_MIMEcharset, value);
+      LYLowerCase(UCAssume_MIMEcharset);    */
+
     return 0;
 }

 static int assume_local_charset_fun ARGS1(
        char *,         value)
 {
-    StrAllocCopy(UCAssume_localMIMEcharset, value);
-    LYLowerCase(UCAssume_localMIMEcharset);
-    UCLYhndl_HTFile_for_unspec = UCGetLYhndl_byMIME(UCAssume_localMIMEcharset);
+    UCLYhndl_HTFile_for_unspec =
+                       safeUCGetLYhndl_byMIME(value);
+    StrAllocCopy(UCAssume_localMIMEcharset,
+                       LYCharSet_UC[UCLYhndl_HTFile_for_unspec].MIMEname);
     return 0;
 }

 static int assume_unrec_charset_fun ARGS1(
        char *,         value)
 {
-    StrAllocCopy(UCAssume_unrecMIMEcharset, value);
-    LYLowerCase(UCAssume_unrecMIMEcharset);
-    UCLYhndl_for_unrec = UCGetLYhndl_byMIME(UCAssume_unrecMIMEcharset);
+    UCLYhndl_for_unrec =
+                       safeUCGetLYhndl_byMIME(value);
+    StrAllocCopy(UCAssume_unrecMIMEcharset,
+                       LYCharSet_UC[UCLYhndl_for_unrec].MIMEname);
     return 0;
 }

@@ -523,13 +530,11 @@
     for (i = 0; LYchar_set_names[i]; i++) { /* search by name, compatibility */
        if (!strncmp(value, LYchar_set_names[i], len)) {
            current_char_set = i;
-           HTMLSetRawModeDefault(current_char_set);
            return 0;
        }
     }

-    current_char_set = UCGetLYhndl_byMIME(value); /* by MIME */
-    HTMLSetRawModeDefault(current_char_set);
+    current_char_set = safeUCGetLYhndl_byMIME(value); /* by MIME */
     return 0;
 }


diff -u old/ucdomap.c ./ucdomap.c
--- old/ucdomap.c       Thu Aug  6 05:28:22 1998
+++ ./ucdomap.c Wed Sep  9 16:23:18 1998
@@ -1502,17 +1502,23 @@
 }

 /*
- *  Currently the charset name has to match exactly -- not substring
- *  matching as was done before (see HTMIME.c, HTML.c).
+ *  Get Lynx internal charset handler from MIME name,
+ *  return -1 if we got NULL or not recognized value.
+ *  According to RFC, MIME headers should match case-insensitively.
  */
 PUBLIC int UCGetLYhndl_byMIME ARGS1(
-       CONST char *,   UC_MIMEcharset)
+       CONST char *,   value)
 {
   int i;
   int LYhndl = -1;
+  char *UC_MIMEcharset = NULL;

-    if (!UC_MIMEcharset || !(*UC_MIMEcharset))
+    if (!value || !(*value))
        return -1;
+
+
+    StrAllocCopy(UC_MIMEcharset, value);
+    LYLowerCase(UC_MIMEcharset);

     for (i = 0;
         (i < MAXCHARSETS && i < LYNumCharsets &&
@@ -2058,4 +2064,18 @@
  *  To add synonyms for any charset name
  *  check function UCGetLYhndl_byMIME in this file.
  */
+}
+
+/*
+ *  Safe variant of UCGetLYhndl_byMIME, with blind recovery from typo
+ *  in user input: lynx.cfg, userdefs.h, switches from command line.
+ */
+PUBLIC int safeUCGetLYhndl_byMIME ARGS1 (CONST char *, value)
+{
+    int i = UCGetLYhndl_byMIME(value);
+
+    if (i == -1)       /* was user's typo or not yet recognized value */
+       i = LATIN1;     /* error recovery? */
+
+    return(i);
 }
Only in .: ucdomap.old
diff -u old/ucmap.h ./ucmap.h
--- old/ucmap.h Thu Aug  6 05:28:22 1998
+++ ./ucmap.h   Wed Sep  9 19:04:14 1998
@@ -33,15 +33,50 @@
 extern UCode_t UCTransToUni PARAMS((
        char            ch_in,
        int             charset_in));
-extern int UCGetLYhndl_byMIME PARAMS((
-       CONST char *    p));
 extern int UCGetRawUniMode_byLYhndl PARAMS((
        int             i));
+extern int UCGetLYhndl_byMIME PARAMS((
+       CONST char *    p)); /* returns -1 if MIME name not recognized */
+extern int safeUCGetLYhndl_byMIME PARAMS((
+       CONST char *    p)); /* returns LATIN1 if MIME name not recognized */

 extern int UCLYhndl_for_unspec;
 extern int UCLYhndl_for_unrec;
 extern int UCLYhndl_HTFile_for_unspec;
 extern int UCLYhndl_HTFile_for_unrec;
+
+/* easy to type: */
+#define LATIN1   UCGetLYhndl_byMIME("iso-8859-1")
+
+/*
+In general,  Lynx translate letters from document charset to display charset.
+If document charset not specified or not recognized by Lynx, we fallback
+to different assumptions below, read also lynx.cfg for info.
+
+UCLYhndl_for_unspec -  assume this as charset for documents that don't
+                       specify a charset parameter in HTTP headers or via META
+                       this corresponds to "assume_charset"
+
+UCLYhndl_HTFile_for_unspec -  assume this as charset of local file
+                       this corresponds to "assume_local_charset"
+
+UCLYhndl_for_unrec  -  in case a charset parameter is not recognized;
+                       this corresponds to "assume_unrec_charset"
+
+UCLYhndl_HTFile_for_unrec  - the same but only for local files,
+                             currently not used.
+
+
+current_char_set  -    this corresponds to "display charset",
+                       declared in LYCharSets.c and really important.
+
+All external charset information available in so called MIME format.
+For internal needs Lynx use charset handlers as integers
+from UCGetLYhndl_byMIME().  However, there is no way to recover
+from users's error in configuration file lynx.cfg or command line switches,
+those unrecognized MIME names assumed as LATIN1 (via safeUCGetLYhndl...).
+*/
+

 #define UCTRANS_NOTFOUND (-4)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]