/*------------------------------------------------------------------------- iso2022.c - Code for the ISO-2022 specific parts of the richtext processor. Copyright (c) 1992 Rhys Weatherley Permission to use, copy, modify, and distribute this material for any purpose and without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies, and that the name of Rhys Weatherley not be used in advertising or publicity pertaining to this material without specific, prior written permission. RHYS WEATHERLEY MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. Revision History: ================ Version DD/MM/YY By Description ------- -------- -- -------------------------------------- 1.0 21/06/92 RW Original Version of iso2022.c You may contact the author by: ============================= e-mail: rhys@cs.uq.oz.au mail: Rhys Weatherley 5 Horizon Drive Jamboree Heights Queensland 4074 Australia Acknowledgements: ================ Many thanks to Yutaka Sato (ysato@etl.go.jp) for protyping ISO-2022 support in a previous version of richtext, on whose code this is based. -------------------------------------------------------------------------*/ #include #include #include #include "richlex.h" #include "richset.h" extern void controloutput(); extern int controlputc(); /* * Global data for this module. */ static int SwToAscii; static int SwToOther; static int OutPrevChar; static int OutPrevPrevChar; static int OutCharLen; static int OutCharSet; static int OutAsciiMode; #define OUT_ASCII 0 #define OUT_JIS 1 #define OUT_KSC 2 #define ESC 033 #define SO 016 #define SI 017 #define ISO_GENERIC_PREFIX "x-iso-2022-gen-" #define ISO_GENERIC_LEN 15 #define ISO_SHIFT_PREFIX "x-iso-shift-" #define ISO_SHIFT_LEN 12 #define ISO_CHARSET_PREFIX "x-iso-charset-" #define ISO_CHARSET_LEN 14 /* * Initialise the ISO-2022 character set processor. */ void iso2022_init (name) char *name; { SwToAscii = 'B'; SwToOther = 'B'; OutPrevChar = 0; OutPrevPrevChar = 0; OutCharLen = 1; OutCharSet = OUT_ASCII; OutAsciiMode = RICH_ENC_US_ASCII; if (!name) return; if (!strncmp (name,"iso-2022-jp",11)) richtextencoding (RICH_ENC_JP_ASCII); else if (!strncmp (name,"iso-2022-kr",11)) { richtextencoding (RICH_ENC_KR_ASCII); controloutput("\033$)C",0); } else richtextencoding (RICH_ENC_US_ASCII); } /* * Process a command for the ISO-2022 processor. */ int iso2022_command (token,negated) char *token; int negated; { int swchar; if (!strcmp(token,"iso-2022-jp")) { if (negated) { /* Return to previous output mode */ controloutput("\033(B",0); SwToAscii = 'B'; charsetpop(&iso2022_charset); } else { /* Enter JIS-X0208-1983 output mode */ charsetpush(&iso2022_charset); richtextencoding(RICH_ENC_US_ASCII); controloutput("\033(B",0); SwToOther = 'B'; } return (1); } else if (!strcmp(token,"iso-2022-kr")) { if (negated) { /* Return to previous output mode */ controloutput("\017",0); charsetpop(&iso2022_charset); } else { /* Enter KSC-5601 output mode */ charsetpush(&iso2022_charset); richtextencoding(RICH_ENC_KR_ASCII); controloutput("\033$)C\017",0); } return (1); } else if (!strncmp(token,"x-jis-x0201",11)) { if (negated) { /* Return to previous output mode */ controloutput("\033(B",0); SwToAscii = 'B'; charsetpop(&iso2022_charset); } else { /* Enter JIS-X-0201-1976 output mode */ charsetpush(&iso2022_charset); richtextencoding(RICH_ENC_JP_ASCII); controloutput("\033(J",0); SwToAscii = 'J'; } return (1); } else if (!strncmp(token,"x-jis-x0208",11)) { if (negated) { /* Return to previous output mode */ controloutput("\033(B",0); SwToAscii = 'B'; charsetpop(&iso2022_charset); } else { /* Enter JIS-X0208-* output mode */ charsetpush(&iso2022_charset); if (!strcmp (token + 11,"-1978")) SwToOther = '@'; else SwToOther = 'B'; richtextencoding((SwToOther == '@' ? RICH_ENC_JIS_1978 : RICH_ENC_JIS_1983)); controloutput("\033$",0); controlputc(SwToOther); } return (1); } else if (!strcmp(token,"x-ksc-5601")) { if (negated) { /* Return to previous output mode */ controloutput("\017",0); charsetpop(&iso2022_charset); } else { /* Enter KSC-5601 output mode */ charsetpush(&iso2022_charset); richtextencoding(RICH_ENC_KSC_5601); controloutput("\033$)C\016",0); } return (1); } else if (!strncmp (token,ISO_GENERIC_PREFIX,ISO_GENERIC_LEN)) { /* Process an escape sequence for changing character sets */ sscanf(token + ISO_GENERIC_LEN,"%x",&swchar); if (swchar <= ' ' || swchar >= 0x7F) return (1); if (negated) { /* Return to previous output mode from multi-byte mode */ if (!charsettop(&iso2022_charset)) charsetpush(&iso2022_charset); richtextencoding((swchar == 'J' ? RICH_ENC_JP_ASCII : RICH_ENC_US_ASCII)); controloutput("\033(",0); controlputc(swchar); SwToAscii = swchar; } else { /* Enter multi-byte (Japanese) mode */ if (!charsettop(&iso2022_charset)) charsetpush(&iso2022_charset); richtextencoding((swchar == '@' ? RICH_ENC_JIS_1978 : RICH_ENC_JIS_1983)); controloutput("\033$",0); controlputc(swchar); SwToOther = swchar; } return (1); } else if (!strncmp (token,ISO_SHIFT_PREFIX,ISO_SHIFT_LEN)) { /* Process a character set shift sequence */ if (!strcmp(token + ISO_SHIFT_LEN,"out") && RichtextCharEncoding == RICH_ENC_KR_ASCII) { /* Enter KSC-5601 2-byte mode */ if (!charsettop(&iso2022_charset)) charsetpush(&iso2022_charset); richtextencoding(RICH_ENC_KSC_5601); controloutput("\016",0); } else if (!strcmp (token + ISO_SHIFT_LEN,"in") && RichtextCharEncoding == RICH_ENC_KSC_5601) { /* Return to US-ASCII from KSC-5601 */ if (!charsettop(&iso2022_charset)) charsetpush(&iso2022_charset); richtextencoding(RICH_ENC_KR_ASCII); controloutput("\017",0); } return (1); } else { return (0); } } /* * Check for singleton ISO-2022 tokens. */ int iso2022_single (token) char *token; { return (!strncmp (token,ISO_GENERIC_PREFIX,ISO_GENERIC_LEN) || !strncmp (token,ISO_SHIFT_PREFIX,ISO_SHIFT_LEN) || !strncmp (token,ISO_CHARSET_PREFIX,ISO_CHARSET_LEN)); } /* * Determine the width of a ISO-2022 character. */ int iso2022_width (ch) RCHAR ch; { return (ch & 0xFF00 ? 2 : 1); } /* * Determine if the current character can be used as a folding point. */ int iso2022_fold (ch) RCHAR ch; { if (ch < 0x7F && isspace (ch)) { return (1); } else { return ((ch & 0xFF00) != 0); } } /* * Render the given ISO-2022 character. */ int iso2022_render (ch,param) RCHAR ch; void *param; { if (ch & 0xFF00) { if (OutCharLen < 2) { /* Add extra escape sequences after stray ASCII characters */ /* This normally happens in excerpts and signatures, etc. */ if (OutAsciiMode == RICH_ENC_JP_ASCII) { (*RichtextPutc) (033,param); (*RichtextPutc) ('$',param); (*RichtextPutc) (SwToOther,param); } else if (OutAsciiMode == RICH_ENC_KR_ASCII) { (*RichtextPutc) (SO,param); } } (*RichtextPutc) ((int)((ch & 0xFF00) >> 8),param); return (*RichtextPutc) ((int)(ch & 0xFF),param); } else { if (OutCharLen > 1 && ch >= 0x20) { /* Add extra escape sequences before stray ASCII characters */ /* This normally happens in excerpts and signatures, etc. */ if (OutAsciiMode == RICH_ENC_JP_ASCII) { (*RichtextPutc) (033,param); (*RichtextPutc) ('(',param); (*RichtextPutc) (SwToAscii,param); } else if (OutAsciiMode == RICH_ENC_KR_ASCII) { (*RichtextPutc) (SI,param); } } return (*RichtextPutc) ((int)(ch & 0xFF),param); } } /* * Enter or leave the ISO-2022 encoding. */ int iso2022_encoding (newenc) int newenc; { switch (newenc) { case RICH_ENC_US_ASCII: controloutput("\033(B",0); break; case RICH_ENC_JP_ASCII: controloutput("\033(J",0); break; case RICH_ENC_KR_ASCII: controloutput("\017",0); break; case RICH_ENC_JIS_1978: controloutput("\033$@",0); break; case RICH_ENC_JIS_1983: controloutput("\033$B",0); break; case RICH_ENC_KSC_5601: controloutput("\016",0); break; default: controloutput("\033(B",0); break; } return 0; } /* * Define the ISO-2022-JP and ISO-2022-KR character set processor. */ struct charsetproc iso2022_charset = {"iso-2022-jp:iso-2022-kr", iso2022_init, iso2022_command, iso2022_single, iso2022_width, iso2022_fold, iso2022_render, iso2022_encoding}; /* * Define an output routine for slotting into RichtextPutc so * that ISO-2022 escape sequences are treated correctly. */ int iso2022_fputc (ch,file) int ch; FILE *file; { if (OutPrevChar == ESC && ch == '(') { /* Process escape sequences that end JIS 2-byte modes */ OutCharLen = 1; OutCharSet = OUT_ASCII; OutAsciiMode = RICH_ENC_JP_ASCII; } if (OutPrevPrevChar == ESC && OutPrevChar == '$') { /* Process escape sequences that start JIS 2-byte modes */ if (ch != ')') { /* ')' is for Korean, so ignore it */ OutCharLen = 2; OutCharSet = OUT_JIS; OutAsciiMode = RICH_ENC_JP_ASCII; } } if (ch == SO) { OutCharLen = 2; OutCharSet = OUT_KSC; OutAsciiMode = RICH_ENC_KR_ASCII; } else if (ch == SI) { OutCharLen = 1; OutCharSet = OUT_ASCII; OutAsciiMode = RICH_ENC_KR_ASCII; } if (ch == '\n' && OutCharLen == 2) { /* Two-byte characters cannot cross line boundaries */ if (OutCharSet == OUT_JIS) { fputc (ESC,file); fputc ('(',file); fputc (SwToAscii,file); fputc ('\n',file); fputc (ESC,file); fputc ('$',file); fputc (SwToOther,file); } else if (OutCharSet == OUT_KSC) { fputc (SI,file); fputc ('\n',file); fputc (SO,file); } } else { fputc (ch,file); } OutPrevPrevChar = OutPrevChar; OutPrevChar = ch; return 0; }