package molly.pages;

import jakarta.servlet.*;
import jakarta.servlet.http.*;
import java.io.*;
import java.util.*;
import fc.web.page.PageServlet;

public class test_realworld_5 extends fc.web.page.PageImpl
{
public void render(HttpServletRequest req, HttpServletResponse res) throws Exception
	{
	/* for people used to typing 'request/response' */
	final HttpServletRequest  request = req;
	final HttpServletResponse response = res;

	res.setContentType("text/html; charset=UTF-8");
	PrintWriter out = res.getWriter();
	out.print  ("<!DOCTYPE HTML SYSTEM \"http://jkorpela.fi/html/loosewbr.dtd\">\n");
	out.print  ("\n");
	out.print  ("<HTML LANG=\"en\">\n");
	out.print  ("\n");
	out.print  ("<HEAD>\n");
	out.print  (" <meta http-equiv=\"Content-Type\" content=\"text/html;charset=iso-8859-1\">\n");
	out.print  ("<meta name=\"viewport\" content=\"initial-scale=1.0\">\n");
	out.print  (" <link rel=\"stylesheet\" type=\"text/css\"  title=\"Yucca's style\" href=\"basic.css\">\n");
	out.print  ("<TITLE>\n");
	out.print  ("Character code tutorial\n");
	out.print  ("</TITLE>\n");
	out.print  ("<META NAME=\"description\" CONTENT=\n");
	out.print  ("\"A tutorial on character code issues in digital processing and\n");
	out.print  ("transfer of text data (on the Internet or otherwise).\n");
	out.print  ("This document tries to clarify the concepts of character repertoire,\n");
	out.print  ("character code, and character encoding (avoiding the term character set,\n");
	out.print  ("which is used confusingly).\n");
	out.print  ("ASCII, ISO 646, ISO 8859 (ISO Latin), Windows character set,\n");
	out.print  ("ISO 10646 (UCS), Unicode,\n");
	out.print  ("UTF-8, and UTF-7 are used as examples.\"\n");
	out.print  (">\n");
	out.print  ("<!--LINK REL=\"alternate\" MEDIA=\"print\" TYPE=\"application/postscript\"\n");
	out.print  (" HREF=\"chars.ps\"-->\n");
	out.print  ("<style type=\"text/css\">\n");
	out.print  (".minitoc {width:25em; font-size: 90%; }\n");
	out.print  ("p.summary { font-size: 100%; margin-left: 0; }\n");
	out.print  ("</style>\n");
	out.print  ("</HEAD>\n");
	out.print  ("\n");
	out.print  ("<BODY>\n");
	out.print  ("<H1>A tutorial on character code issues</H1>\n");
	out.print  ("\n");
	out.print  ("<div class=\"minitoc\">\n");
	out.print  ("<H2 class=\"toc\"><a name=\"toc\">Contents</a></H2>\n");
	out.print  ("<UL class=\"toc\">\n");
	out.print  ("<LI><A HREF=\"#basics\">The basics</A></LI>\n");
	out.print  ("<LI><A HREF=\"#defs\">Definitions</A>:\n");
	out.print  ("<A HREF=\"#repertoire\">character repertoire</A>,\n");
	out.print  ("<A HREF=\"#code\">character code</A>,\n");
	out.print  ("<A HREF=\"#encoding\">character encoding</A>\n");
	out.print  ("     </LI>\n");
	out.print  ("<li><a href=\"#examples\">Examples of character codes</a>\n");
	out.print  ("<ul>\n");
	out.print  ("<LI><A HREF=\"#ascii\">Good old ASCII</A></LI>\n");
	out.print  ("<LI><A HREF=\"#latin1\">Another example: ISO Latin&nbsp;1 alias ISO 8859-1</A></LI>\n");
	out.print  ("<LI><A HREF=\"#win\">More examples: the Windows character <nobr>set(s)</nobr></A></LI>\n");
	out.print  ("<LI><A HREF=\"#asciiext\">The ISO 8859 family</a></li>\n");
	out.print  ("<li><a href=\"#other-extensions-to-ascii\">Other \"extensions to ASCII\"</A></LI>\n");
	out.print  ("<li><a href=\"#8bit\">Other \"8-bit codes\"</a></li>\n");
	out.print  ("<LI><A HREF=\"#10646\">ISO 10646 (UCS) and Unicode</A></LI>\n");
	out.print  ("</ul></li>\n");
	out.print  ("<LI><A HREF=\"#characters\">More about the character concept</A>\n");
	out.print  ("<ul>\n");
	out.print  ("<li><a href=\"#unicode-view\">The Unicode view</a></li>\n");
	out.print  ("<li><a href=\"#control\">Control characters (control codes)</a></li>\n");
	out.print  ("<li><a href=\"#glyph\">A glyph - a visual appearance</a></li>\n");
	out.print  ("<li><a href=\"#names\">What's in a name?</a></li>\n");
	out.print  ("<li><a href=\"#repdef\">Glyph variation</a></li>\n");
	out.print  ("<li><a href=\"#font\">Fonts</a></li>\n");
	out.print  ("<li><a href=\"#identity\">Identity of characters: a matter of definition</a></li>\n");
	out.print  ("<li><a href=\"#display\">Failures to display a character</a></li>\n");
	out.print  ("<li><a href=\"#math\">Linear text vs. mathematical notations</a></li>\n");
	out.print  ("<li><a href=\"#compat\">Compatibility characters</a></li>\n");
	out.print  ("<li><a href=\"#compose\">Compositions and decompositions</a></li>\n");
	out.print  ("</ul>\n");
	out.print  ("</LI>\n");
	out.print  ("<LI><A HREF=\"#typing\">Typing characters</A>\n");
	out.print  ("<ul>\n");
	out.print  ("<li><a href=\"#key\">Just pressing a key?</a></li>\n");
	out.print  ("<li><a href=\"#pgm\">Program-specific methods for typing characters</a></li>\n");
	out.print  ("<li><a href=\"#esc\">\"Escape\" notations (\"meta notations\") for characters</a></li>\n");
	out.print  ("<li><a href=\"#identify\">How to mention (identify) a character</a></li>\n");
	out.print  ("</ul>  \n");
	out.print  ("     </LI>\n");
	out.print  ("<LI><A HREF=\"#encinfo\">Information about encoding</A>\n");
	out.print  (" <UL>\n");
	out.print  (" <LI><A HREF=\"#whyencinfo\">The need for information about encoding</A></LI>\n");
	out.print  (" <LI><A HREF=\"#MIME\">The MIME solution</A></LI>\n");
	out.print  (" <LI><A HREF=\"#qp\">An auxiliary encoding: Quoted-Printable (QP)</A></LI>\n");
	out.print  (" <LI><A HREF=\"#MIMEatwork\">How MIME should work in practice</A></LI>\n");
	out.print  (" <LI><A HREF=\"#problems\">Problems with implementations - examples</A></LI>\n");
	out.print  (" </UL>\n");
	out.print  (" </LI>\n");
	out.print  ("<LI><A HREF=\"#concl\">Practical conclusions</A></LI>\n");
	out.print  ("<LI><A HREF=\"#more\">Further reading</A></LI>\n");
	out.print  ("</UL>\n");
	out.print  ("</div>\n");
	out.print  ("\n");
	out.print  ("<P CLASS=\"summary\">\n");
	out.print  ("This document tries to clarify the concepts of\n");
	out.print  ("<em><a href=\"#repertoire\">character repertoire</a></em>,\n");
	out.print  ("<em><a href=\"#code\">character code</a></em>, and\n");
	out.print  ("<em><a href=\"#encoding\">character encoding</a></em>\n");
	out.print  ("especially in the Internet context.\n");
	out.print  ("It specifically avoids the term\n");
	out.print  ("<em>character set</em>, which\n");
	out.print  ("is confusingly used to denote repertoire\n");
	out.print  ("<em>or</em>\n");
	out.print  ("code <em>or</em> encoding.\n");
	out.print  ("<A HREF=\"#ascii\">ASCII</A>,\n");
	out.print  ("<A HREF=\"#646\">ISO 646</A>,\n");
	out.print  ("<a href=\"#8859\">ISO 8859</a> (ISO Latin, especially\n");
	out.print  ("<a href=\"#latin1\">ISO Latin 1</a>),\n");
	out.print  ("<a href=\"#win\">Windows character set</a>,\n");
	out.print  ("<A HREF=\"#10646\">ISO 10646, UCS, and Unicode</A>,\n");
	out.print  ("<a href=\"#utf\">UTF-8, UTF-7</a>,\n");
	out.print  ("<A TITLE=\"Multipurpose Internet Mail Extensions\" HREF=\"#MIME\">MIME</A>, and\n");
	out.print  ("<A TITLE=\"Quoted-Printable encoding\" HREF=\"#qp\">QP</A>\n");
	out.print  ("are used as examples.\n");
	out.print  ("This document in itself does not contain solutions to practical\n");
	out.print  ("problems with character codes\n");
	out.print  ("(but see section\n");
	out.print  ("<a href=\"#more\"><cite>Further reading</cite></A>).\n");
	out.print  ("Rather, it gives background\n");
	out.print  ("information needed for understanding what solutions there might be,\n");
	out.print  ("what the different solutions do - and what's really the problem\n");
	out.print  ("in the first place.\n");
	out.print  ("</P>\n");
	out.print  ("<!--\n");
	out.print  ("[P class=\"noprint\"]\n");
	out.print  ("This document is also available\n");
	out.print  ("[A TITLE=\n");
	out.print  ("'The PostScript version of \"A tutorial on character code issues\"'\n");
	out.print  ("HREF=\"chars.ps\"]\n");
	out.print  ("in Postscript form[/A]\n");
	out.print  ("(generated by\n");
	out.print  ("[A TITLE=\n");
	out.print  ("\"info about html2ps, a Perl script for converting HTML to PostScript\"\n");
	out.print  (" HREF=\"http://www.tdb.uu.se/%7ejan/html2ps.html\"]html2ps[/A]).[/P]-->\n");
	out.print  ("\n");
	out.print  ("<p>If you are looking for some quick help in using a large character repertoire\n");
	out.print  ("in HTML authoring, see the document\n");
	out.print  ("<a href=\"html/chars.html\"><cite>Using national and special\n");
	out.print  ("characters in HTML</cite></a>.</p>\n");
	out.print  ("<P>\n");
	out.print  ("Several <EM>technical terms</EM> related to character sets\n");
	out.print  ("(e.g. glyph, encoding) can be difficult to understand, due to various\n");
	out.print  ("confusions and due to having different names in different languages\n");
	out.print  ("and contexts. The\n");
	out.print  ("<a href=\n");
	out.print  ("http://iate.europa.eu\n");
	out.print  ("title=\"InterActive Terminology for Europe\"\n");
	out.print  (">IATE</a>\n");
	out.print  ("online database can be useful:\n");
	out.print  ("it contains translations and definitions for several technical\n");
	out.print  ("terms used here.\n");
	out.print  ("\n");
	out.print  ("<!--\n");
	out.print  ("<span class=\"noprint\">You may wish to use the following simplified search form\n");
	out.print  ("to access EuroDicAutom:</span></p>\n");
	out.print  ("\n");
	out.print  ("<form lang=\"en\"\n");
	out.print  (" name=\"EXPERT\" method=\"POST\" class=\"noprint\"\n");
	out.print  (" action=\"http://europa.eu.int/eurodicautom/Controller\">\n");
	out.print  ("<div class=\"noprint\">Looking for equivalents of\n");
	out.print  ("<input type=\"text\" name=\"query\" maxLength=\"255\" size=\"30\" value=\"\">\n");
	out.print  ("<input type=\"hidden\" NAME=\"display_term\" VALUE=\"HITALL\">\n");
	out.print  ("<input type=\"hidden\" name=\"CURRENTSCREEN\" value=\"EXPERT2\">\n");
	out.print  ("<input type=\"hidden\" name=\"source\" value=\"EN\">\n");
	out.print  ("in\n");
	out.print  ("<select name=\"target\" size=\"1\" multiple>\n");
	out.print  ("<option value=\"S\" selected>all available languages</option> \n");
	out.print  ("<option value=\"DA\"> Danish (DA)</option><option value=\"NL\"> Dutch (NL)</option><option value=\"EN\"> English (EN)</option><option value=\"FI\"> Finnish (FI)</option><option value=\"FR\"> French (FR)</option><option value=\"DE\"> German (DE)</option><option value=\"EL\"> Greek (EL)</option><option value=\"IT\"> Italian (IT)</option><option value=\"LA\"> Latin (LA)</option><option value=\"PT\"> Portuguese (PT)</option><option value=\"ES\"> Spanish (ES)</option><option value=\"SV\"> Swedish (SV)</option>\n");
	out.print  ("	    </select>\n");
	out.print  ("<input type=\"submit\" value=\"Search\"></div>\n");
	out.print  ("</form>\n");
	out.print  ("-->\n");
	out.print  ("\n");
	out.print  ("<H2><A NAME=\"basics\">The basics</A></H2>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("<A NAME=\"octet\">In computers and in data transmission between them,\n");
	out.print  ("i.e. in digital data processing and transfer,\n");
	out.print  ("data is internally presented as octets, as a rule.\n");
	out.print  ("An <DFN>octet</DFN> is a small unit of data with a numerical\n");
	out.print  ("value between 0 and 255, inclusively.</A>\n");
	out.print  ("The numerical values are presented in the normal (decimal) notation\n");
	out.print  ("here, but notice that other presentations are used too, especially\n");
	out.print  ("<A TITLE=\n");
	out.print  ("'description of octal notation (in Free On-line Dictionary of Computing)'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=octal&amp;action=Search\"\n");
	out.print  (">octal</A>\n");
	out.print  ("(base 8) or\n");
	out.print  ("<A TITLE=\n");
	out.print  ("'description of hexadecimal notation (in Free On-line Dictionary of Computing)'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=hexadecimal&amp;action=Search\"\n");
	out.print  (">hexadecimal</A>\n");
	out.print  ("(base 16) notation.\n");
	out.print  ("Octets are often called <DFN>bytes</DFN>, but in principle,\n");
	out.print  ("octet is a more definite concept than\n");
	out.print  ("<A TITLE='definition of \"byte\" in Free On-line Dictionary of Computing'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=byte&amp;action=Search\"\n");
	out.print  (">byte</A>.\n");
	out.print  ("Internally, octets consist of eight\n");
	out.print  ("<A TITLE='definition of \"bit\" in Free On-line Dictionary of Computing'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=bit&amp;action=Search\"\n");
	out.print  (">bit</A>s\n");
	out.print  ("(hence the name, from\n");
	out.print  ("Latin <I LANG=\"la\">octo</I> 'eight'), but we need not go\n");
	out.print  ("into bit level here. However, you might need to know what\n");
	out.print  ("the phrase \"first bit set\" or \"sign bit set\" means, since it\n");
	out.print  ("is often used. In terms of numerical values of octets, it means\n");
	out.print  ("that the value is greater than 127. In various contexts, such  \n");
	out.print  ("octets are sometimes interpreted as <em>negative</em> numbers,\n");
	out.print  ("and this may cause various problems.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Different conventions can be established as regards to how\n");
	out.print  ("an octet\n");
	out.print  ("or a sequence of octets\n");
	out.print  ("presents some data.\n");
	out.print  ("For instance, four consecutive octets often form a unit\n");
	out.print  ("that presents a real number according to a specific standard.\n");
	out.print  ("We are here interested in the presentation of\n");
	out.print  ("character data (or string data; a <DFN>string</DFN> is a\n");
	out.print  ("sequence of characters) only.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("In the simplest case, which is still widely used,\n");
	out.print  ("one octet corresponds to one character according to some\n");
	out.print  ("mapping table (encoding).\n");
	out.print  ("Naturally, this allows at most 256 different\n");
	out.print  ("characters being represented.\n");
	out.print  ("There are several different\n");
	out.print  ("encodings, such as the well-known\n");
	out.print  ("<A HREF=\"#ascii\">ASCII</A>\n");
	out.print  ("encoding and\n");
	out.print  ("the\n");
	out.print  ("<A HREF=\"#8859\">ISO Latin family</A>\n");
	out.print  ("of encodings.\n");
	out.print  ("The correct interpretation and processing of character data\n");
	out.print  ("of course requires knowledge about the encoding used.\n");
	out.print  ("For HTML documents, such information should be sent by the\n");
	out.print  ("Web server along with the document itself, using so-called\n");
	out.print  ("<a href=\"http.html\"\n");
	out.print  ("title=\"Quick reference to HTTP headers\"\n");
	out.print  ("><ABBR TITLE=\"Hypertext Transfer Protocol\">HTTP</ABBR>\n");
	out.print  ("headers</a>\n");
	out.print  ("(cf. to <A HREF=\"#headers\">MIME headers</A>).\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Previously the\n");
	out.print  ("<A HREF=\"#ascii\">ASCII</A>\n");
	out.print  ("encoding was usually\n");
	out.print  ("assumed by default (and it is still very common).\n");
	out.print  ("Nowadays\n");
	out.print  ("<a href=\"#latin1\">ISO Latin&nbsp;1</a>,\n");
	out.print  ("which can be regarded as an <a href=\"#ascii8\">extension of ASCII</a>,\n");
	out.print  ("is often the default. The current trend is to avoid giving\n");
	out.print  ("such a special position to ISO Latin&nbsp;1 among the variety\n");
	out.print  ("of encodings.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<H2><A NAME=\"defs\">Definitions</A></H2>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("The following definitions are not universally accepted and\n");
	out.print  ("used. In fact, one of the greatest causes of confusion\n");
	out.print  ("around character set issues is that terminology varies and\n");
	out.print  ("is sometimes misleading.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<DL>\n");
	out.print  ("<DT>\n");
	out.print  ("<A NAME=\"repertoire\">character repertoire</A>\n");
	out.print  ("</DT><DD>\n");
	out.print  ("A set of distinct characters.\n");
	out.print  ("No specific internal presentation\n");
	out.print  ("in computers or data transfer\n");
	out.print  ("is assumed. The repertoire per se does not even define an ordering for the \n");
	out.print  ("characters; ordering for sorting and other purposes is to be specified separately.\n");
	out.print  ("A character repertoire is usually defined by\n");
	out.print  ("specifying <a href=\"#names\">names</a> of characters and a sample (or reference)\n");
	out.print  ("presentation of characters in visible form.\n");
	out.print  ("Notice that a character repertoire may contain characters\n");
	out.print  ("which <EM>look</EM> the same in some presentations but are\n");
	out.print  ("regarded as logically distinct, such as Latin uppercase A,\n");
	out.print  ("Cyrillic uppercase A, and\n");
	out.print  ("Greek uppercase alpha. For more about this, see\n");
	out.print  ("<A HREF=\"#characters\">a discussion of the character concept</A>\n");
	out.print  ("later in this document.\n");
	out.print  ("</DD>\n");
	out.print  ("<DT>\n");
	out.print  ("<A NAME=\"code\">character code</A>\n");
	out.print  ("</DT><DD>\n");
	out.print  ("A mapping, often presented in tabular form, which defines\n");
	out.print  ("a one-to-one correspondence between characters in a character\n");
	out.print  ("<a href=\"#repertoire\">repertoire</a>\n");
	out.print  ("     and a set of nonnegative integers. That is,\n");
	out.print  ("it assigns a unique numerical code,\n");
	out.print  ("a <DFN>code position</DFN>,\n");
	out.print  ("to each character in the\n");
	out.print  ("repertoire.\n");
	out.print  ("In addition to being often presented as one or more tables,\n");
	out.print  ("the code as a whole can be regarded as a single table and the code\n");
	out.print  ("     positions as indexes.\n");
	out.print  ("As synonyms for \"code position\", the following terms are also in use:\n");
	out.print  ("<DFN>code number</DFN>,\n");
	out.print  ("<DFN>code value</DFN>,\n");
	out.print  ("<DFN>code element</DFN>,\n");
	out.print  ("<DFN>code point</DFN>,\n");
	out.print  ("<DFN>code set value</DFN>&nbsp;- and just <dfn>code</dfn>.\n");
	out.print  ("Note: The set of nonnegative integers corresponding to\n");
	out.print  ("characters need not\n");
	out.print  ("consist of consecutive numbers; in fact, most character codes\n");
	out.print  ("have \"holes\", such as code positions reserved for\n");
	out.print  ("     <a href=\"#control\">control functions</a>\n");
	out.print  ("     or for eventual future use to be defined later.\n");
	out.print  ("</DD>\n");
	out.print  ("<DT>\n");
	out.print  ("<A NAME=\"encoding\">character encoding</A>\n");
	out.print  ("</DT><DD>\n");
	out.print  ("A method (algorithm) for presenting characters in digital form\n");
	out.print  ("by mapping sequences of\n");
	out.print  ("<a href=\"#code\">code numbers</a> of characters into\n");
	out.print  ("sequences of <a href=\"#octet\">octets</a>.\n");
	out.print  ("     In the simplest case, each character is\n");
	out.print  ("mapped to an integer in the range 0 - 255 according to\n");
	out.print  ("a character code and these are used as such as octets.\n");
	out.print  ("Naturally, this only works for character\n");
	out.print  ("<a href=\"#repertoire\">repertoire</a>s with at most 256\n");
	out.print  ("characters. For larger sets, more complicated encodings are needed.\n");
	out.print  ("<a href=\"#charsetreg\">Encodings have names, which can be registered</a>.\n");
	out.print  ("</DD>\n");
	out.print  ("</DL>\n");
	out.print  ("<P>\n");
	out.print  ("Notice that a character code assumes or implicitly defines\n");
	out.print  ("a character repertoire.\n");
	out.print  ("A character encoding could, in principle, be viewed purely as\n");
	out.print  ("a method of mapping a sequence of integers to a sequence of octets.\n");
	out.print  ("However, quite often an encoding is specified in terms of\n");
	out.print  ("a character code (and the implied\n");
	out.print  ("character repertoire).\n");
	out.print  ("The <EM>logical</EM> structure is still the following:\n");
	out.print  ("</P>\n");
	out.print  ("<OL>\n");
	out.print  ("<LI>A character <em>repertoire</em>\n");
	out.print  ("     specifies a collection of characters, such as\n");
	out.print  ("     \"a\", \"!\", and \"ä\".</LI>\n");
	out.print  ("<LI>A character <em>code</em>\n");
	out.print  ("     defines numeric codes for characters\n");
	out.print  ("     in a repertoire. For example, in the\n");
	out.print  ("<A HREF=\"#10646\">ISO 10646</A>\n");
	out.print  ("     character code the numeric codes for\n");
	out.print  ("     \"a\", \"!\", \"ä\", and \"&#8240;\" (per mille sign) are\n");
	out.print  ("97, 33, 228, and 8240. (Note: Especially the per mille sign,\n");
	out.print  ("     presenting <small><sup>0</sup>/<sub>00</sub></small> as a single character, can be \n");
	out.print  ("shown\n");
	out.print  ("     incorrectly on display or on paper. That would be an illustration\n");
	out.print  ("     of the symptoms of the problems we are discussing.)\n");
	out.print  ("     </LI>\n");
	out.print  ("<LI>A character <em>encoding</em>\n");
	out.print  ("     defines how sequences of numeric\n");
	out.print  ("     codes are presented as (i.e., mapped to)\n");
	out.print  ("     sequences of octets.\n");
	out.print  ("In one possible encoding for\n");
	out.print  ("<A HREF=\"#10646\">ISO 10646</A>,\n");
	out.print  ("the string a!ä&#8240; is presented as the following sequence of\n");
	out.print  ("     octets (using two octets for each character):\n");
	out.print  ("     0, 97, 0, 33, 0, 228, 32, 48.\n");
	out.print  ("     </LI>\n");
	out.print  ("</OL>\n");
	out.print  ("<p>For a more rigorous explanation of these basic concepts, see\n");
	out.print  ("<cite><a href=\"http://www.unicode.org/unicode/reports/tr17/\"\n");
	out.print  (">Unicode Technical Report&nbsp;#17:\n");
	out.print  ("Character Encoding Model</a></cite>.</p>\n");
	out.print  ("<P>\n");
	out.print  ("The phrase\n");
	out.print  ("<dfn>character set</dfn> is used in a variety of meanings.\n");
	out.print  ("It might denotes just a character repertoire\n");
	out.print  ("but it may also\n");
	out.print  ("refer to a character code, and quite often\n");
	out.print  ("a particular character encoding is implied too.\n");
	out.print  ("</P>\n");
	out.print  ("<p>Unfortunately the word <dfn>charset</dfn> is used to refer to\n");
	out.print  ("an encoding, causing much confusion.\n");
	out.print  ("It is even the official term to be used in several contexts by\n");
	out.print  ("Internet protocols, in <a href=\"#MIME\">MIME</a> headers.</p>\n");
	out.print  ("<P>\n");
	out.print  ("Quite often the choice of a character repertoire, code, or\n");
	out.print  ("encoding is presented as the choice of a <EM>language</EM>.\n");
	out.print  ("For example, Web browsers typically confuse things quite a lot in this area.\n");
	out.print  ("A&nbsp;pulldown menu in a program might be labeled \"Languages\", yet\n");
	out.print  ("consist of character encoding choices (only).\n");
	out.print  ("A language setting is quite distinct from character issues,\n");
	out.print  ("although naturally each language has its own requirements on\n");
	out.print  ("character repertoire.\n");
	out.print  ("Even more seriously, programs and their documentation very often\n");
	out.print  ("confuse the above-mentioned issues with the selection of a\n");
	out.print  ("<a href=\"#font\">font</a>.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<h2><a name=\"examples\">Examples of character codes</a></h2>\n");
	out.print  ("\n");
	out.print  ("<h3><A NAME=\"ascii\">Good old ASCII</A></h3>\n");
	out.print  ("\n");
	out.print  ("<h4>The basics of ASCII</h4>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("The name <dfn>ASCII</dfn>, originally an abbreviation for\n");
	out.print  ("\"American Standard Code for Information Interchange\",\n");
	out.print  ("denotes\n");
	out.print  ("an old character\n");
	out.print  ("<a href=\"#repertoire\">repertoire</a>,\n");
	out.print  ("<a href=\"#code\">code</a>, and\n");
	out.print  ("<a href=\"#encoding\">encoding</a>.</p>\n");
	out.print  ("<p class=\"summary\">Most character codes currently\n");
	out.print  ("in use contain ASCII as  their subset\n");
	out.print  ("in some sense.\n");
	out.print  ("ASCII is the safest character repertoire to be used\n");
	out.print  ("in data transfer. However,\n");
	out.print  ("<a href=\"#safe-ascii\"\n");
	out.print  ("title=\"Subsets of ASCII for safety\">\n");
	out.print  ("not even all ASCII characters are \"safe\"!</a></p>\n");
	out.print  ("<p>\n");
	out.print  ("ASCII has been used and is used so widely that often the word <i>ASCII</i>\n");
	out.print  ("refers to \"text\" or \"plain text\" in general, even if the\n");
	out.print  ("character code is something else!\n");
	out.print  ("The words \"ASCII file\" quite often mean any text file as opposite\n");
	out.print  ("to a binary file.\n");
	out.print  ("</p>\n");
	out.print  ("<P>\n");
	out.print  ("The definition of ASCII also specifies a set of\n");
	out.print  ("<a href=\"#control\">control codes</a>\n");
	out.print  ("(\"control characters\") such as linefeed (LF) and escape (ESC). But\n");
	out.print  ("the <EM>character repertoire</EM> proper,\n");
	out.print  ("consisting of the\n");
	out.print  ("<em>printable</em> characters of ASCII, is the following\n");
	out.print  ("(where the first item is the blank, or space,\n");
	out.print  ("character):\n");
	out.print  ("</P>\n");
	out.print  ("<PRE>\n");
	out.print  ("  ! \" # $ % &amp; ' ( ) * + , - . /\n");
	out.print  ("0 1 2 3 4 5 6 7 8 9 : ; &lt; = &gt; ?\n");
	out.print  ("@ A B C D E F G H I J K L M N O\n");
	out.print  ("P Q R S T U V W X Y Z [ \\ ] ^ _\n");
	out.print  ("` a b c d e f g h i j k l m n o\n");
	out.print  ("p q r s t u v w x y z { | } ~ \n");
	out.print  ("</PRE>\n");
	out.print  ("\n");
	out.print  ("<p>The <em>appearance</em> of characters varies, of course,\n");
	out.print  ("especially for some special characters. Some of the variation \n");
	out.print  ("and other details are explained in\n");
	out.print  ("<cite><a href=\"latin1/index.html\">The ISO Latin 1 character repertoire -\n");
	out.print  ("a description with usage notes</a></cite>.</p>\n");
	out.print  ("\n");
	out.print  ("<h4>A formal view on ASCII</h4>\n");
	out.print  ("<P>\n");
	out.print  ("The <EM>character code</EM> defined by the ASCII standard\n");
	out.print  ("is the following:\n");
	out.print  ("code values are assigned to characters consecutively\n");
	out.print  ("in the order in which the characters are listed above\n");
	out.print  ("(rowwise),\n");
	out.print  ("starting from 32 (assigned to the blank)\n");
	out.print  ("and ending up with 126 (assigned to the tilde\n");
	out.print  ("character <CODE>~</CODE>).\n");
	out.print  ("Positions 0 through 31 and 127 are reserved for\n");
	out.print  ("<a href=\"#control\">control codes</a>. They have standardized\n");
	out.print  ("<a href=\"chars/c0.html\"\n");
	out.print  ("title=\n");
	out.print  ("\"Ascii control codes (control characters, C0 controls)\" \n");
	out.print  (">names and descriptions</a>,\n");
	out.print  ("but in fact their usage varies a lot.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("The <EM>character encoding</EM> specified by the ASCII\n");
	out.print  ("standard is very simple,\n");
	out.print  ("and the most obvious one for any character code where the code\n");
	out.print  ("numbers do not exceed 255:\n");
	out.print  ("each code number is presented\n");
	out.print  ("as an octet with the same value.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Octets 128 - 255 are not used in ASCII.\n");
	out.print  ("(This allows programs to use\n");
	out.print  ("the first, most significant bit of an octet\n");
	out.print  ("as a <a href=\n");
	out.print  ("\"http://webopedia.internet.com/TERM/p/parity.html\"\n");
	out.print  (">parity</a> bit, for example.)\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"national-ascii\">National variants of ASCII</a></h4>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("There are several\n");
	out.print  ("national variants of ASCII.\n");
	out.print  ("In such variants, some special characters have been\n");
	out.print  ("replaced by national letters (and other symbols).\n");
	out.print  ("There is great variation here, and even within one country and\n");
	out.print  ("for one language there might be different variants.\n");
	out.print  ("The original ASCII is therefore often referred to as\n");
	out.print  ("<dfn>US-ASCII</dfn>; the formal standard (by\n");
	out.print  ("<a href=\"http://www.ansi.org/\"\n");
	out.print  ("title=\"American National Standards Institute\"\n");
	out.print  (">ANSI</a>) is\n");
	out.print  ("<cite>ANSI X3.4-1986</cite>.</p>\n");
	out.print  ("<p><small>The phrase \"original ASCII\" is\n");
	out.print  ("perhaps not quite adequate, since the creation of ASCII started\n");
	out.print  ("in late 1950s, and several additions and modifications were made in\n");
	out.print  ("the 1960s. The <a href=\n");
	out.print  ("\"http://www.wps.com/projects/codes/index.html#ASCII-1963\"\n");
	out.print  (">1963 version</a> had several unassigned code positions.\n");
	out.print  ("The ANSI standard, where those positions were assigned, mainly to\n");
	out.print  ("accommodate\n");
	out.print  ("lower case letters,\n");
	out.print  ("was approved in 1967/1968, later\n");
	out.print  ("modified slightly.\n");
	out.print  ("For the early history, including pre-ASCII\n");
	out.print  ("character codes, see\n");
	out.print  ("Steven J. Searle's\n");
	out.print  ("<cite><a href=\n");
	out.print  ("\"http://tronweb.super-nova.co.jp/characcodehist.html\"\n");
	out.print  (">A Brief History of Character Codes\n");
	out.print  ("     in\n");
	out.print  ("     North America, Europe, and East Asia</a></cite> and\n");
	out.print  ("Tom Jennings'\n");
	out.print  ("<cite><a href=\n");
	out.print  ("\"http://www.wps.com/projects/codes/index.html\"\n");
	out.print  (">ASCII: American Standard Code for Information Infiltration</a></cite>.\n");
	out.print  ("See also <a href=\"http://www.jimprice.com/\">Jim Price</a>'s\n");
	out.print  ("<cite><a href=\"http://jimprice.com/jim-asc.htm\">ASCII Chart</a></cite>,\n");
	out.print  ("Mary Brandel's\n");
	out.print  ("\n");
	out.print  ("<cite><a href=\"http://web.archive.org/web/20041012084602/http://www.bobbemer.com/brandela.htm\"\n");
	out.print  (">1963: ASCII Debuts</a></cite>,\n");
	out.print  ("and the <a href=\"http://web.archive.org/web/20041012084602/http://www.bobbemer.com/HISTORY.HTM\"\n");
	out.print  (">computer history documents</a>, including\n");
	out.print  ("the background and creation of ASCII,\n");
	out.print  ("written by\n");
	out.print  ("<a href=\"http://web.archive.org/web/20041012084602/http://www.bobbemer.com/\">Bob Bemer</a>, \"father of ASCII\".</small></P>\n");
	out.print  ("<P>\n");
	out.print  ("<A NAME=\"646\">The international standard</A>\n");
	out.print  ("<A TITLE=\"Basic facts and references about ISO 646\"\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://web.archive.org/web/20030605114512/http://www.diffuse.org/chars.html#ISO646\"\n");
	out.print  ("><CITE>ISO 646</CITE></A>\n");
	out.print  ("defines a character set similar to\n");
	out.print  ("<a href=\"#ascii\">US-ASCII</a> but\n");
	out.print  ("with <A HREF=\"#code\">code positions</A>\n");
	out.print  ("corresponding to US-ASCII characters\n");
	out.print  ("<tt>@[\\]{|}</tt>\n");
	out.print  ("as \"national use positions\".\n");
	out.print  ("It also gives some liberties with characters\n");
	out.print  ("<tt>#$^`~</tt>.\n");
	out.print  ("The standard also defines\n");
	out.print  ("\"international reference version (IRV)\", which is\n");
	out.print  ("(in the 1991 edition of ISO 646) identical to US-ASCII.\n");
	out.print  ("<a href=\"http://www.ecma-international.org/\"\n");
	out.print  (">Ecma International</a> has issued the\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.ecma-international.org/publications/standards/Ecma-006.htm\"\n");
	out.print  ("title=\"7-Bit Coded Character Set (ECMA-6 standard)\"\n");
	out.print  ("class=\"nobr\"\n");
	out.print  (">ECMA-6</a> standard, which is equivalent to ISO&nbsp;646\n");
	out.print  ("and is freely available on the Web.</p>\n");
	out.print  ("<P>\n");
	out.print  ("Within the framework of ISO 646, and partly otherwise too,\n");
	out.print  ("several \"national variants of ASCII\" have been defined,\n");
	out.print  ("assigning different letters and symbols to the\n");
	out.print  ("\"national use\" positions.\n");
	out.print  ("Thus, the characters that appear in those positions&nbsp;- including\n");
	out.print  ("those in US-ASCII&nbsp;-\n");
	out.print  ("are somewhat \"unsafe\" in international data\n");
	out.print  ("transfer, although this problem is losing significance. The trend is\n");
	out.print  ("towards using the corresponding codes strictly for US-ASCII meanings;\n");
	out.print  ("national characters are handled otherwise, giving them their own,\n");
	out.print  ("unique and universal code positions in \n");
	out.print  ("character codes larger than ASCII.\n");
	out.print  ("But old software and devices\n");
	out.print  ("may still reflect various \"national variants of ASCII\".\n");
	out.print  ("<P>\n");
	out.print  ("The following table\n");
	out.print  ("lists ASCII characters which might\n");
	out.print  ("be replaced by other characters in national\n");
	out.print  ("variants of ASCII. (That is, the code positions of these\n");
	out.print  ("US-ASCII characters might be occupied by other characters needed\n");
	out.print  ("for national use.) The lists of characters appearing\n");
	out.print  ("in national variants are not intended to be exhaustive, just\n");
	out.print  ("typical <em>examples</em>.\n");
	out.print  ("</P>\n");
	out.print  ("<TABLE BORDER=\"1\" class=\"normal\">\n");
	out.print  ("<THEAD>\n");
	out.print  ("<TR ALIGN=LEFT><TH>dec <TH>oct <TH>hex <TH><A TITLE=\n");
	out.print  ("\"General description of the glyph concept\"\n");
	out.print  (" HREF=\"#glyph\">glyph</A> <TH>official\n");
	out.print  ("<A HREF=\"#10646\">Unicode</A> name\n");
	out.print  ("<TH>National variants</TH></TR>\n");
	out.print  ("</THEAD>\n");
	out.print  ("<TBODY>\n");
	out.print  ("<TR><TH>\n");
	out.print  ("<TR><TD align=right> 35 <TD align=right> 43 <TD align=right>23  <TH>#  <TD><A HREF=\"latin1/3.html#23\"><SPAN CLASS=\"charname\">number sign</SPAN></A>\n");
	out.print  ("<TD>£ Ù\n");
	out.print  ("<TR><TD align=right> 36 <TD align=right> 44 <TD align=right>24  <TH>$  <TD><A HREF=\"latin1/3.html#24\"><SPAN CLASS=\"charname\">dollar sign</SPAN></A>\n");
	out.print  ("<TD>¤\n");
	out.print  ("<TR><TD align=right> 64 <TD align=right>100 <TD align=right>40  <TH>@  <TD><A HREF=\"latin1/3.html#40\"><SPAN CLASS=\"charname\">commercial at</SPAN></A>\n");
	out.print  ("<TD>É § Ä à ³\n");
	out.print  ("<TR><TD align=right> 91 <TD align=right>133 <TD align=right>5B  <TH>[  <TD><A HREF=\"latin1/3.html#5B\"><SPAN CLASS=\"charname\">left square bracket</SPAN></A>\n");
	out.print  ("<TD>Ä Æ ° â ¡ ÿ é\n");
	out.print  ("<TR><TD align=right> 92 <TD align=right>134 <TD align=right>5C  <TH>\\  <TD><A HREF=\"latin1/3.html#5C\"><SPAN CLASS=\"charname\">reverse solidus</SPAN></A>\n");
	out.print  ("<TD>Ö Ø ç Ñ ½ ¥\n");
	out.print  ("<TR><TD align=right> 93 <TD align=right>135 <TD align=right>5D  <TH>]  <TD><A HREF=\"latin1/3.html#5D\"><SPAN CLASS=\"charname\">right square bracket</SPAN></A>\n");
	out.print  ("<TD>Å Ü § ê é ¿ |\n");
	out.print  ("<TR><TD align=right> 94 <TD align=right>136 <TD align=right>5E  <TH>^  <TD><A HREF=\"latin1/3.html#5E\"><SPAN CLASS=\"charname\">circumflex accent</SPAN></A>\n");
	out.print  ("<TD>Ü  î \n");
	out.print  ("<TR><TD align=right> 95 <TD align=right>137 <TD align=right>5F  <TH>_  <TD><A HREF=\"latin1/3.html#5F\"><SPAN CLASS=\"charname\">low line</SPAN></A>\n");
	out.print  ("<TD>è\n");
	out.print  ("<TR><TD align=right> 96 <TD align=right>140 <TD align=right>60  <TH>`  <TD><A HREF=\"latin1/3.html#60\"><SPAN CLASS=\"charname\">grave accent</SPAN></A>\n");
	out.print  ("<TD>é ä µ ô ù\n");
	out.print  ("<TR><TD align=right>123 <TD align=right>173 <TD align=right>7B  <TH>{  <TD><A HREF=\"latin1/3.html#7B\"><SPAN CLASS=\"charname\">left curly bracket</SPAN></A>\n");
	out.print  ("<TD>ä æ é à ° ¨\n");
	out.print  ("<TR><TD align=right>124 <TD align=right>174 <TD align=right>7C  <TH>|  <TD><A HREF=\"latin1/3.html#7C\"><SPAN CLASS=\"charname\">vertical line</SPAN></A>\n");
	out.print  ("<TD>ö ø ù ò ñ <I>f</I>\n");
	out.print  ("<TR><TD align=right>125 <TD align=right>175 <TD align=right>7D  <TH>}  <TD><A HREF=\"latin1/3.html#7D\"><SPAN CLASS=\"charname\">right curly bracket</SPAN></A>\n");
	out.print  ("<TD>å ü è ç ¼\n");
	out.print  ("<TR><TD align=right>126 <TD align=right>176 <TD align=right>7E  <TH>~  <TD><A HREF=\"latin1/3.html#7E\"><SPAN CLASS=\"charname\">tilde</SPAN></A>\n");
	out.print  ("<TD>ü ¯ ß ¨ û ì ´ _\n");
	out.print  ("</TBODY>\n");
	out.print  ("</TABLE>\n");
	out.print  ("\n");
	out.print  ("<P>Almost all of the characters used in the national variants have\n");
	out.print  ("been incorporated into <a href=\"#latin1\">ISO Latin 1</a>.\n");
	out.print  ("Systems that support ISO Latin 1 in principle\n");
	out.print  ("may still reflect the use of national variants of ASCII in some details;\n");
	out.print  ("for example, an ASCII character might get <em>printed</em> or\n");
	out.print  ("<em>displayed</em> according to some national variant.\n");
	out.print  ("Thus, even\n");
	out.print  ("\"plain ASCII text\" is thereby not always portable from one\n");
	out.print  ("system or application to another.</P>\n");
	out.print  ("\n");
	out.print  ("<P>More information about national variants and their impact:\n");
	out.print  ("<UL>\n");
	out.print  ("  <li> <a href=\n");
	out.print  ("       \"http://www.terena.nl/library/multiling/euroml/JWvanWingen.html\"\n");
	out.print  ("       >Johan van Wingen</a>:\n");
	out.print  ("<cite><a href=\n");
	out.print  ("       \"http://www.terena.nl/library/multiling/euroml/section04.html\"\n");
	out.print  ("       >International standardization of 7-bit codes, ISO 646</a></cite>;\n");
	out.print  ("       contains a comparison table of national variants\n");
	out.print  ("<LI><a\n");
	out.print  (" href=\n");
	out.print  ("\"http://www.alanflavell.org.uk//iso8859/digress.html#national\"\n");
	out.print  ("     ><cite>Digression on national 7-bit codes</cite></a>\n");
	out.print  ("by\n");
	out.print  ("<A HREF=\"http://www.alanflavell.org.uk//\"\n");
	out.print  ("       >Alan&nbsp;J.&nbsp;Flavell</a>\n");
	out.print  ("<li> The <a href=\"http://czyborra.com/charsets/iso646.html\"\n");
	out.print  ("     >ISO 646 page</a> by\n");
	out.print  ("Roman Czyborra\n");
	out.print  ("<li> <a href=\"http://www.kudpc.kyoto-u.ac.jp/%7eyasuoka/CJK.html\"\n");
	out.print  ("     >Character tables</a> by\n");
	out.print  ("     <a href=\"http://www.kudpc.kyoto-u.ac.jp/%7eyasuoka/index.html\"\n");
	out.print  ("     >Koichi Yasuoka</a>.\n");
	out.print  ("</UL>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"safe-ascii\">Subsets of ASCII for safety</a></h4>\n");
	out.print  ("\n");
	out.print  ("<p>Mainly due to the <a href=\"#national-ascii\">\"national variants\"</a>\n");
	out.print  ("discussed above, some characters are less \"safe\" than others,\n");
	out.print  ("i.e. more often transferred or interpreted incorrectly.</p>\n");
	out.print  ("\n");
	out.print  ("<p>In addition to the letters of the English alphabet\n");
	out.print  ("(\"A\"&nbsp;to&nbsp;\"Z\", and \"a\"&nbsp;to&nbsp;\"z\"), the digits\n");
	out.print  ("(\"0\"&nbsp;to&nbsp;\"9\") and\n");
	out.print  ("the space (\"&nbsp;\"), only the following characters can be\n");
	out.print  ("regarded as really \"safe\" in data transmission:\n");
	out.print  ("<pre>! \" % &amp; ' ( ) * + , - . / : ; &lt; = &gt; ?</pre>\n");
	out.print  ("\n");
	out.print  ("<p>Even these characters might eventually be <em>interpreted</em>\n");
	out.print  ("wrongly by the recipient, e.g. by a human reader seeing \n");
	out.print  ("a <a href=\"#glyph\">glyph</a> for \"&\" as something else than\n");
	out.print  ("what it is intended to denote, or by a program interpreting\n");
	out.print  ("\"&lt;\" as starting some special\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=markup\"\n");
	out.print  ("title=\"What is markup (a definition)\"\n");
	out.print  (">markup</a>,\n");
	out.print  ("\"?\" as being a so-called\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://webopedia.internet.com/TERM/w/wildcard_character.html\"\n");
	out.print  ("     >wildcard</a> character,\n");
	out.print  ("etc.</p>\n");
	out.print  ("\n");
	out.print  ("<p>When you need to <em>name</em> things\n");
	out.print  ("(e.g. files, variables, data fields, etc.), it is often best\n");
	out.print  ("to use only the characters listed above, even if a wider character\n");
	out.print  ("repertoire is possible. Naturally you need to take into account any\n");
	out.print  ("additional restrictions imposed by the applicable syntax.\n");
	out.print  ("For example, the rules of a programming language might restrict\n");
	out.print  ("the character repertoire in identifier names to\n");
	out.print  ("letters, digits and one or two other characters.</p>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"ascii8\">The misnomer \"8-bit ASCII\"</a></h4>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("Sometimes the phrase \"8-bit ASCII\" is used.\n");
	out.print  ("It follows from the discussion above that in reality\n");
	out.print  ("<em>ASCII is strictly\n");
	out.print  ("and unambiguously a 7-bit code</em> in the sense that all code positions\n");
	out.print  ("are in the range 0&nbsp;-&nbsp;127.\n");
	out.print  ("</p><p>\n");
	out.print  ("It is a misnomer\n");
	out.print  ("used to refer to <EM>various</EM> character\n");
	out.print  ("<a href=\"#code\">codes</a>\n");
	out.print  ("which are <dfn>extensions of <A HREF=\"#ascii\">ASCII</A></dfn> in\n");
	out.print  ("the following sense:\n");
	out.print  ("the character repertoire\n");
	out.print  ("contains ASCII as a subset, the code numbers are in the\n");
	out.print  ("range 0 - 255, and the code numbers of ASCII characters equal\n");
	out.print  ("their ASCII codes.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<h3><A NAME=\"latin1\">Another example: ISO Latin&nbsp;1 alias ISO 8859-1</A></h3>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("The ISO 8859-1 standard (which is part of the\n");
	out.print  ("<a href=\"#8859\">ISO 8859 family</a> of standards)\n");
	out.print  ("defines a\n");
	out.print  ("<em>character <a href=\"#repertoire\">repertoire</a></em> identified as\n");
	out.print  ("\"Latin alphabet No. 1\", commonly called \"ISO Latin&nbsp;1\",\n");
	out.print  ("as well as a\n");
	out.print  ("<em>character <a href=\"#code\">code</a></em> for it.\n");
	out.print  ("The repertoire contains the <A HREF=\"#ascii\">ASCII</A> repertoire as a subset,\n");
	out.print  ("and the code numbers for those characters are the same as in ASCII.\n");
	out.print  ("The standard also specifies an\n");
	out.print  ("<em><a href=\"#encoding\">encoding</a></em>, which is similar to\n");
	out.print  ("that of ASCII: each code number is\n");
	out.print  ("presented simply as one octet.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("In addition to the ASCII characters, ISO Latin&nbsp;1 contains\n");
	out.print  ("various accented characters and other letters needed for writing\n");
	out.print  ("languages of Western Europe, and some special characters.\n");
	out.print  ("These characters occupy code positions 160 - 255, and they are:\n");
	out.print  ("</P>\n");
	out.print  ("<PRE>\n");
	out.print  ("  ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯\n");
	out.print  ("° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿\n");
	out.print  ("À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï\n");
	out.print  ("Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß\n");
	out.print  ("à á â ã ä å æ ç è é ê ë ì í î ï\n");
	out.print  ("ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ\n");
	out.print  ("</PRE>\n");
	out.print  ("<P>Notes:</P>\n");
	out.print  ("<UL class=\"emb\">\n");
	out.print  ("<LI>The first of the characters above appears as space;\n");
	out.print  ("it is the so-called\n");
	out.print  ("<A TITLE=\"A description of the NO-BREAK SPACE character\"\n");
	out.print  ("HREF=\"latin1/3.html#A0\">no-break space</A>.</LI>\n");
	out.print  ("<LI>\n");
	out.print  ("The presentation of some characters in copies of this\n");
	out.print  ("document may be defective e.g. due to lack of\n");
	out.print  ("     <a href=\"#font\">font</a> support.\n");
	out.print  ("You may wish to compare the presentation of the characters on\n");
	out.print  ("your browser with the\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://czyborra.com/charsets/iso8859-1.gif\"\n");
	out.print  ("     >character table presented as a GIF image</A>\n");
	out.print  ("in the famous\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://czyborra.com/charsets/iso8859.html\">\n");
	out.print  ("<CITE>ISO 8859 Alphabet Soup</CITE></A> document.\n");
	out.print  ("(In text only mode, you may wish to use my simple\n");
	out.print  ("     <a href=\"ISO-table.html\">table of ISO Latin 1</a>\n");
	out.print  ("     which contains the names of the characters.)\n");
	out.print  ("</LI>\n");
	out.print  ("<LI>     Naturally, the appearance of characters varies from one\n");
	out.print  ("     <a href=\"#font\">font</a> to another.\n");
	out.print  ("</LI>\n");
	out.print  ("</UL>\n");
	out.print  ("\n");
	out.print  ("<P>See also: <a href=\"latin1/index.html\"><cite>The ISO Latin 1 character repertoire -\n");
	out.print  ("a description with usage notes</cite></a>, which presents detailed\n");
	out.print  ("characterizations of the <em>meanings</em> of the characters\n");
	out.print  ("and comments on their usage in various contexts.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<h3><A NAME=\"win\">More examples: the Windows character <nobr>set(s)</nobr></A></h3>\n");
	out.print  ("<P>\n");
	out.print  ("In\n");
	out.print  ("<a href=\"#latin1\">ISO 8859-1</a>,\n");
	out.print  ("code positions\n");
	out.print  ("128 - 159 are explicitly reserved for\n");
	out.print  ("<a href=\"#control\">control purposes</a>;\n");
	out.print  ("they \"correspond to bit combinations that do not represent\n");
	out.print  ("graphic characters\".\n");
	out.print  ("The so-called\n");
	out.print  ("<!--\"http://voyager.cns.ohiou.edu/%7esadkins/web_library/fonts/font_specs/5-3ch.html\"-->\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://www.microsoft.com/globaldev/reference/sbcs/1252.htm\"\n");
	out.print  ("title=\"Microsoft Windows Codepage : 1252 (Latin I)\" \n");
	out.print  (">Windows character set</A>\n");
	out.print  ("(WinLatin1, or\n");
	out.print  ("<A TITLE=\"cp1252_WinLatin1 to Unicode table\"\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT\"\n");
	out.print  (">Windows code page 1252</A>,\n");
	out.print  ("to be exact)\n");
	out.print  ("uses some of those positions for printable characters. Thus,\n");
	out.print  ("the Windows character set\n");
	out.print  ("<strong>is\n");
	out.print  ("not identical with\n");
	out.print  ("<A HREF=\"#latin1\">ISO 8859-1</A></strong>.\n");
	out.print  ("It is, however, true that the Windows character set is much more similar\n");
	out.print  ("to ISO 8859-1 than the so-called\n");
	out.print  ("<A HREF=\"#cp\">DOS character sets</A>\n");
	out.print  ("are. The Windows character set is often called\n");
	out.print  ("\"ANSI character set\", but this is seriously misleading.\n");
	out.print  ("It has <em>not</em> been approved by\n");
	out.print  ("<a href=\"http://www.ansi.org/\"\n");
	out.print  ("title=\"American National Standards Institute\"\n");
	out.print  (">ANSI</a>. (Historical background: Microsoft based the design\n");
	out.print  ("of the set on a <em>draft</em> for an ANSI standard.\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.microsoft.com/globaldev/reference/glossary.mspx\"\n");
	out.print  (">A glossary</a> by Microsoft explicitly\n");
	out.print  ("admits this.)</p>\n");
	out.print  ("\n");
	out.print  ("<p><small>Note that programs used on Windows systems may use a DOS\n");
	out.print  ("character set; for example, if you create a text file using a Windows\n");
	out.print  ("program and then use the <code>type</code> command on DOS prompt\n");
	out.print  ("to see its content,\n");
	out.print  ("strange things may happen, since the DOS command interprets the data\n");
	out.print  ("according to a DOS character code.</small></P>\n");
	out.print  ("<P>\n");
	out.print  ("In the Windows character set, some positions in the range 128 - 159 are\n");
	out.print  ("assigned to printable characters, such as\n");
	out.print  ("\"smart quotes\",\n");
	out.print  ("em dash, en dash,\n");
	out.print  ("and trademark symbol.\n");
	out.print  ("Thus, the character repertoire is larger than\n");
	out.print  ("<a href=\"#latin1\">ISO Latin&nbsp;1</a>.\n");
	out.print  ("The use of octets in the range 128&nbsp;- 159 in any data to\n");
	out.print  ("be processed by a program that expects ISO 8859-1 encoded data\n");
	out.print  ("is an error which might cause just anything.\n");
	out.print  ("They might for example get ignored,\n");
	out.print  ("or be processed in a manner which looks meaningful,\n");
	out.print  ("or be interpreted as <a href=\"#control\">control characters</a>.\n");
	out.print  ("See my document\n");
	out.print  ("<a href=\"www/windows-chars.html\">\n");
	out.print  ("<cite>On the use of some MS Windows characters in HTML</cite></a>\n");
	out.print  ("for a discussion of the problems of using these characters.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("The Windows character set exists in\n");
	out.print  ("different variations, or <strong>\"code pages\"</strong> (CP),\n");
	out.print  ("which generally differ from the corresponding ISO 8859 standard so\n");
	out.print  ("that it contains same characters in positions 128&nbsp;- 159 as\n");
	out.print  ("code page 1252. (However, there are some more\n");
	out.print  ("<a href=\"unicode/greek.html\">differences between ISO 8859-7 and win-1253\n");
	out.print  ("(WinGreek)</a>.)\n");
	out.print  ("See\n");
	out.print  ("<a href=\"http://czyborra.com/charsets/codepages.html\"><cite>Code page &amp;Co.</cite></a>\n");
	out.print  ("by\n");
	out.print  ("Roman Czyborra and\n");
	out.print  ("<a href=\"http://www.microsoft.com/globaldev/reference/WinCP.asp\"\n");
	out.print  ("><cite>Windows codepages</cite></a>\n");
	out.print  ("by <a href=\"http://www.microsoft.com/\">Microsoft</a>.\n");
	out.print  ("See also\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/\"\n");
	out.print  ("title=\"Windows code page to Unicode mapping tables\"\n");
	out.print  (">CP to Unicode mappings</a>.\n");
	out.print  ("What we\n");
	out.print  ("have discussed here is the most usual one, resembling ISO 8859-1.\n");
	out.print  ("Its status in the\n");
	out.print  ("<A TITLE=\"Information about charset registration procedure\"\n");
	out.print  ("HREF=\"#charsetreg\">officially IANA registry</A> was unclear;\n");
	out.print  ("an encoding had been registered under the name\n");
	out.print  ("<CODE>ISO-8859-1-Windows-3.1-Latin-1</CODE> by Hewlett-Packard&nbsp;(!),\n");
	out.print  ("assumably intending to refer to WinLatin1, but in 1999-12\n");
	out.print  ("<a href=\"http://www.isi.edu/in-notes/iana/assignments/character-set-info/windows-1252\"\n");
	out.print  (">Microsoft finally registered</a> it under the name <CODE>windows-1252</CODE>.\n");
	out.print  ("That name has in fact been widely used for it. (The name <code>cp-1252</code>\n");
	out.print  ("has been used too, but it isn't officially registered even as an alias name.)</P>\n");
	out.print  ("\n");
	out.print  ("<h3><A NAME=\"asciiext\">The ISO 8859 family</A></h3>\n");
	out.print  ("<P>\n");
	out.print  ("There are several character codes which are extensions to <A HREF=\"#ascii\">ASCII</A>\n");
	out.print  ("in the same <a href=\"#ascii8\"\n");
	out.print  ("title=\"What does &quot;extension to ASCII&quot; mean?\"\n");
	out.print  (">sense</a> as\n");
	out.print  ("<A HREF=\"#latin1\">ISO 8859-1</A>\n");
	out.print  ("and the\n");
	out.print  ("<A HREF=\"#win\">Windows character set</A>.\n");
	out.print  ("<P>\n");
	out.print  ("<A NAME=\"8859\">ISO 8859-1 itself is just a member of the</A>\n");
	out.print  ("ISO 8859 family of character codes, which is nicely overviewed in\n");
	out.print  ("Roman Czyborra's famous\n");
	out.print  ("document\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://czyborra.com/charsets/iso8859.html\"><cite>The ISO 8859 Alphabet\n");
	out.print  ("Soup</cite></a>.\n");
	out.print  ("The ISO 8859 codes extend the\n");
	out.print  ("<a href=\"#ascii\">ASCII</a>\n");
	out.print  ("repertoire in different ways\n");
	out.print  ("with different special characters (used in different languages\n");
	out.print  ("and cultures).\n");
	out.print  ("Just as ISO 8859-1 contains ASCII characters and a collection\n");
	out.print  ("of characters needed in languages of western (and northern) Europe,\n");
	out.print  ("there is ISO 8859-2 alias ISO Latin 2 constructed similarly\n");
	out.print  ("for languages of central/eastern Europe, etc.\n");
	out.print  ("The ISO 8859 character codes are <em>isomorphic</em>\n");
	out.print  ("in the following sense: code positions 0 - 127 contain the same\n");
	out.print  ("character as in ASCII, positions 128 - 159 are unused\n");
	out.print  ("(reserved for <a href=\"#control\">control\n");
	out.print  ("characters</a>), and positions 160 - 255 are\n");
	out.print  ("the varying part, used differently in different members of the\n");
	out.print  ("ISO 8859 family.\n");
	out.print  ("</P><P>\n");
	out.print  ("The ISO 8859 character codes are normally presented using\n");
	out.print  ("the obvious encoding: each code position is presented as one octet.\n");
	out.print  ("Such encodings have several alternative names in\n");
	out.print  ("the official <a href=\"#charsetreg\">registry of character encodings</a>,\n");
	out.print  ("but the preferred ones are of the form <nobr>ISO-8859-<var>n</var>.</nobr>\n");
	out.print  ("</P><P>\n");
	out.print  ("Although ISO 8859-1 has been a de facto default\n");
	out.print  ("encoding in many contexts, it has in principle no special role.\n");
	out.print  ("<a href=\"latin9.html\"\n");
	out.print  ("title=\"ISO Latin 9 as compared with ISO Latin 1\" \n");
	out.print  (">ISO 8859-15 alias ISO Latin 9 (!)</a>\n");
	out.print  ("was expected to\n");
	out.print  ("replace ISO 8859-1 to a great extent, since it contains the\n");
	out.print  ("politically important symbol for\n");
	out.print  ("<a href=\"http://europa.eu.int/euro/html/entry.html\"\n");
	out.print  ("title=\"Official EU information about euro (multilingual site)\"\n");
	out.print  (">euro</a>, but it seems to have little practical use.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("The following table lists the ISO 8859 alphabets, with links\n");
	out.print  ("to more detailed descriptions. There is a separate document\n");
	out.print  ("<A HREF=\"8859.html\">\n");
	out.print  ("<CITE>Coverage of European languages by ISO Latin alphabets</CITE></A>\n");
	out.print  ("which you might use to determine which (if any) of the alphabets are\n");
	out.print  ("suitable for a document in a given language or combination of\n");
	out.print  ("languages. My\n");
	out.print  ("<a href=\"iso8859/\">other material on ISO 8859</a> contains\n");
	out.print  ("a combined character table, too.\n");
	out.print  ("</P>\n");
	out.print  ("<TABLE BORDER=\"1\" cellpadding=\"4\" cellspacing=\"0\">\n");
	out.print  ("<CAPTION>The parts of ISO 8859<BR></CAPTION>\n");
	out.print  ("<TR><TH>standard </TH><TH>name of alphabet\n");
	out.print  ("</TH><TH>characterization<BR></TH></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-1\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\"\n");
	out.print  (">ISO&nbsp;8859-1</A> </TD><TD>\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"latin1/index.html\"\n");
	out.print  ("TITLE=\n");
	out.print  ("\"The ISO Latin 1 character repertoire - a description with usage notes\">\n");
	out.print  ("Latin alphabet No. 1</A>\n");
	out.print  ("</TD><TD>\n");
	out.print  ("\"Western\", \"West European\"<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-2\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-2</A> </TD><TD>Latin alphabet No. 2\n");
	out.print  ("</TD><TD>\"Central European\", \"East European\"<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-3\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-3</A> </TD><TD>Latin alphabet No. 3\n");
	out.print  ("</TD><TD>\"South European\"; \"Maltese &amp; Esperanto\"<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-4\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-4</A> </TD><TD>Latin alphabet No. 4\n");
	out.print  ("</TD><TD>\"North European\"<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-5\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-5</A> </TD><TD>Latin/Cyrillic alphabet\n");
	out.print  ("</TD><TD>(for Slavic languages)<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-6\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-6</A> </TD><TD>Latin/Arabic alphabet\n");
	out.print  ("</TD><TD>(for the Arabic language)<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-7\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-7</A> </TD><TD>Latin/Greek alphabet\n");
	out.print  ("</TD><TD>(for modern Greek)<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-8\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-8</A> </TD><TD>Latin/Hebrew alphabet\n");
	out.print  ("</TD><TD>(for Hebrew and Yiddish)<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-9\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-9</A> </TD><TD>Latin alphabet No. 5\n");
	out.print  ("</TD><TD>\"Turkish\"<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-10\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-10</A> </TD><TD>Latin alphabet No. 6\n");
	out.print  ("</TD><TD>\"Nordic\" (S&aacute;mi, Inuit, Icelandic)<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-11\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-11</A> </TD><TD>Latin/Thai alphabet\n");
	out.print  ("</TD><TD>(for the Thai language)<BR></TD></TR>\n");
	out.print  ("<TR><TD COLSPAN=\"3\">\n");
	out.print  ("<A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-12\"\n");
	out.print  ("TITLE=\"Information about this gap\"><SMALL>(Part\n");
	out.print  ("12 has not been defined.)</SMALL></A><BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-13\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-13</A> </TD><TD>Latin alphabet No. 7\n");
	out.print  ("</TD><TD>Baltic Rim<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-14\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-14</A> </TD><TD>Latin alphabet No. 8\n");
	out.print  ("</TD><TD>Celtic<BR></TD></TR>\n");
	out.print  ("<TR><TD><A HREF=\"http://czyborra.com/charsets/iso8859.html#ISO-8859-15\"\n");
	out.print  ("TITLE=\"Short description with code table (upper half)\">ISO&nbsp;8859-15</A>\n");
	out.print  ("</TD><TD><A HREF=\"latin9.html\"\n");
	out.print  ("TITLE=\"ISO Latin 9 as compared with ISO Latin 1\">\n");
	out.print  ("Latin alphabet No. 9</A>\n");
	out.print  ("</TD><TD>\"euro\"<BR></TD></TR>\n");
	out.print  ("<TR><TD>\n");
	out.print  ("ISO&nbsp;8859-16\n");
	out.print  ("</TD><TD>\n");
	out.print  ("Latin alphabet No. 10\n");
	out.print  ("</TD><TD>\n");
	out.print  ("for South-Eastern Europe (see below)\n");
	out.print  ("<BR></TD></TR>\n");
	out.print  ("</TABLE>\n");
	out.print  ("<P><SMALL>Notes:\n");
	out.print  ("ISO 8859-<VAR>n</VAR> is Latin alphabet <abbr title=\"number\">no.</abbr>\n");
	out.print  ("<VAR>n</VAR> for <VAR>n</VAR>=1,2,3,4, but\n");
	out.print  ("this correspondence is broken for the other Latin alphabets.\n");
	out.print  ("<strong><a name=\"8859-16\"\n");
	out.print  ("href=\"http://www.evertype.com/standards/iso8859/fdis8859-16-en.pdf\"\n");
	out.print  ("title=\"Committee Draft for Latin alphabet No. 10 (PDF format)\"\n");
	out.print  (">ISO 8859-16</a></strong> is for use in\n");
	out.print  ("Albanian, Croatian, English, Finnish, French,\n");
	out.print  ("German, Hungarian, Irish Gaelic (new orthography),\n");
	out.print  ("Italian, Latin, Polish, Romanian, and\n");
	out.print  ("Slovenian. In particular, it contains letters s and t with\n");
	out.print  ("comma below, in order to address\n");
	out.print  ("<a href=\"8859.html#IX\">an issue of writing Romanian</a>.\n");
	out.print  ("See the\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://anubis.dkuug.dk/jtc1/sc2/\"\n");
	out.print  (">ISO/IEC JTC 1/ SC 2</a> site for the current status and\n");
	out.print  ("proposed changes to the ISO 8859 set of standards.</small></p>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"other-extensions-to-ascii\">Other \"extensions to ASCII\"</a></h3>\n");
	out.print  ("\n");
	out.print  ("<P>In addition to the codes discussed above, \n");
	out.print  ("there are other extensions to ASCII\n");
	out.print  ("which utilize the\n");
	out.print  ("code range 0&nbsp;-&nbsp;255\n");
	out.print  ("(<A TITLE='Explanation of the misnomer \"8-bit ASCII\"'\n");
	out.print  (" HREF=\"#ascii8\">\"8-bit ASCII codes\"</A>), such as\n");
	out.print  ("</P>\n");
	out.print  ("<DL>\n");
	out.print  ("  <DT> \n");
	out.print  ("  <A NAME=\"cp\"><strong>DOS</strong> character codes</A>, or \"code pages\" (CP)\n");
	out.print  ("  </DT><DD> \n");
	out.print  ("  In\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?MS-DOS\"\n");
	out.print  ("       >MS DOS</a>\n");
	out.print  ("       systems, different character codes are used; they are\n");
	out.print  ("  called \"code pages\". The original American code page was CP 437,\n");
	out.print  ("  which has e.g. some Greek letters, mathematical symbols, and\n");
	out.print  ("  characters which can be used as elements in simple pseudo-graphics.\n");
	out.print  ("  Later CP 850 became popular, since it contains letters needed\n");
	out.print  ("  for West European languages - largely the same letters as\n");
	out.print  ("  <A HREF=\"#latin1\">ISO 8859-1</A>, but in different code\n");
	out.print  ("  positions.\n");
	out.print  ("See <a href=\n");
	out.print  ("\"http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/\"\n");
	out.print  (">DOS code page to Unicode mapping tables</a> for detailed information.\n");
	out.print  ("Note that DOS code pages are quite different from\n");
	out.print  ("       <a href=\"#win\">Windows character codes</a>, though the latter\n");
	out.print  ("are sometimes called with names like <code>cp-1252</code>\n");
	out.print  ("       (=&nbsp;<code>windows-1252</code>)!\n");
	out.print  ("For further confusion,\n");
	out.print  ("Microsoft now prefers to use the notion \"OEM code page\"\n");
	out.print  ("for the DOS character set used in a particular country.\n");
	out.print  ("  </DD>\n");
	out.print  ("  <DT> \n");
	out.print  ("  <A NAME=\"maccode\"><strong>Macintosh</strong> character code</A>\n");
	out.print  ("  </DT><DD> \n");
	out.print  ("  On the\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=macintosh&amp;action=Search\"\n");
	out.print  ("       >Macs</a>,\n");
	out.print  ("       the character code is more uniform than on PCs\n");
	out.print  ("(although there are some\n");
	out.print  ("<A TITLE=\"Mac OS Encoding Variants\" HREF=\n");
	out.print  ("\"http://developer.apple.com/techpubs/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.b1.html\"\n");
	out.print  ("       >national variants</A>).       \n");
	out.print  ("  The Mac character repertoire is a mixed combination of ASCII,\n");
	out.print  ("  accented letters, mathematical symbols, and other ingredients.\n");
	out.print  ("See section\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://developer.apple.com/techpubs/mac/Text/Text-2.html\">\n");
	out.print  ("<cite>Text</cite></A> in\n");
	out.print  ("<!--\"http://gemma.apple.com/dev/techsupport/insidemac/\"-->\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://developer.apple.com/techpubs/macos8/mac8.html\">\n");
	out.print  ("<cite>Mac OS 8 and 9 Developer Documentation</cite></A>.\n");
	out.print  ("  </DD>\n");
	out.print  ("</DL>\n");
	out.print  ("<P>\n");
	out.print  ("Notice that\n");
	out.print  ("many of these\n");
	out.print  ("are very different from ISO 8859-1.\n");
	out.print  ("They may have different character repertoires, and the same\n");
	out.print  ("character often has different code values in different codes.\n");
	out.print  ("For example, code position\n");
	out.print  ("228 is occupied\n");
	out.print  ("by ä (letter a with dieresis, or umlaut) in ISO 8859-1,\n");
	out.print  ("by &#240; (Icelandic letter eth) in HP's\n");
	out.print  ("<!--\"http://hpcc920.external.hp.com/cposupport/printers/support_doc/bpl02461.html\"-->\n");
	out.print  ("<A TITLE=\"Roman 8 Character Conversion Table\" HREF=\n");
	out.print  ("\"http://www.robelle.com/library/smugbook/roman8.html\"\n");
	out.print  (">Roman-8</A>,\n");
	out.print  ("by &otilde; (letter o with tilde) in\n");
	out.print  ("DOS code page 850, and\n");
	out.print  ("per mille sign&nbsp;(&#8240;)\n");
	out.print  ("in Macintosh character code.\n");
	out.print  ("</P>\n");
	out.print  ("<P>For information about several code pages, see\n");
	out.print  ("<a href=\"http://czyborra.com/charsets/codepages.html\"><cite>Code page &amp;Co.</cite></a>\n");
	out.print  ("by\n");
	out.print  ("Roman Czyborra.\n");
	out.print  ("See also his excellent\n");
	out.print  ("<a href=\"http://czyborra.com/charsets/cyrillic.html\"\n");
	out.print  ("title=\"The Cyrillic Charset Soup\"\n");
	out.print  (">description of various <strong>Cyrillic</strong> encodings</a>, such as different\n");
	out.print  ("variants of KOI-8; most of them are extensions to ASCII, too.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("In general, full\n");
	out.print  ("<strong>conversions</strong> between the character codes\n");
	out.print  ("mentioned above are not possible.\n");
	out.print  ("For example, the Macintosh character repertoire\n");
	out.print  ("contains the Greek letter pi, which does not exist in\n");
	out.print  ("<a href=\"#latin1\">ISO Latin&nbsp;1</a>\n");
	out.print  ("at all. Naturally, a text can be\n");
	out.print  ("converted\n");
	out.print  ("(by a simple program which uses a conversion table)\n");
	out.print  ("from Macintosh character\n");
	out.print  ("code to ISO 8859-1 if the text contains only those characters which\n");
	out.print  ("belong to the ISO Latin&nbsp;1 character repertoire.\n");
	out.print  ("Text presented in\n");
	out.print  ("<a href=\"#win\">Windows character code</a>\n");
	out.print  ("can be used as such\n");
	out.print  ("as ISO 8859-1 encoded data <em>if</em> it contains only\n");
	out.print  ("those characters which belong to the ISO Latin&nbsp;1 character repertoire.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"8bit\">Other \"8-bit codes\"</a></h3>\n");
	out.print  ("\n");
	out.print  ("<p>All the character codes discussed above are \"8-bit codes\",\n");
	out.print  ("eight bits are sufficient for presenting the\n");
	out.print  ("<a href=\"#code\">code numbers</a>\n");
	out.print  ("and in practice the\n");
	out.print  ("<a href=\"#encoding\">encoding</a> (at least the normal encoding)\n");
	out.print  ("is the obvious (trivial) one where each code position (thereby,\n");
	out.print  ("each character) is presented as one octet (byte).\n");
	out.print  ("This means that there are 256 code positions, but several\n");
	out.print  ("positions are reserved for <a href=\"#control\">control codes</a> or\n");
	out.print  ("left unused (unassigned, undefined).\n");
	out.print  ("</p> \n");
	out.print  ("\n");
	out.print  ("<p>Although currently most \"8-bit codes\" are\n");
	out.print  ("<a href=\"#ascii8\">extensions to ASCII</a>\n");
	out.print  ("in the sense described above, this is just a practical matter\n");
	out.print  ("caused by the widespread use of <a href=\"#ascii\">ASCII</a>.\n");
	out.print  ("It was practical to make the \"lower halves\" of the character codes the\n");
	out.print  ("same, for several reasons.</p>\n");
	out.print  ("<p><a name=\"2022\">The standards</a>\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://web.archive.org/web/20030605114512/http://www.diffuse.org/oii/en/chars.html#ISO2022\"\n");
	out.print  (">ISO 2022</a>\n");
	out.print  ("<!--title=\"8-bit Character Sets - ISO/IEC 2022\"\n");
	out.print  ("\"http://www.stri.is/TC304/GUIDE/gis2022.htm\"-->\n");
	out.print  ("and\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://web.archive.org/web/20030605114512/http://www.diffuse.org/chars.html#ISO4873\">ISO 4873</a> define a\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://developer.apple.com/techpubs/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.a3.html\"\n");
	out.print  ("title=\"General Character Set Structure\"\n");
	out.print  (">general framework</a> for 8-bit codes (and 7-bit codes)\n");
	out.print  ("and for switching between them.\n");
	out.print  ("One of the basic ideas is that code positions 128&nbsp;-&nbsp;159 (decimal)\n");
	out.print  ("are reserved for use as <a href=\"#control\">control codes</a>\n");
	out.print  ("(\"C1&nbsp;controls\"). Note that\n");
	out.print  ("the <a href=\"#win\">Windows</a> character sets do not comply with this principle.</p>\n");
	out.print  ("<p>\n");
	out.print  ("<a name=\"ebcdic\">To illustrate that other kinds of\n");
	out.print  ("8-bit codes can be defined\n");
	out.print  ("than extensions to Ascii,\n");
	out.print  ("we briefly consider the</a>\n");
	out.print  ("<strong><a href=\"http://webopedia.internet.com/TERM/E/EBCDIC.html\"\n");
	out.print  ("title=\"EBCDIC (a short description)\"\n");
	out.print  (">EBCDIC</A></strong> code, defined by\n");
	out.print  ("<a href=\"http://www.ibm.com/\">IBM</a> and once in widespread use\n");
	out.print  ("on \"<a href=\n");
	out.print  ("\"http://webopedia.internet.com/TERM/m/mainframe.html\"\n");
	out.print  (">mainframes</a>\"\n");
	out.print  ("(and still in use).\n");
	out.print  ("EBCDIC contains all ASCII characters but in\n");
	out.print  ("quite different <a href=\"#code\">code positions</a>.\n");
	out.print  ("As an interesting detail, in EBCDIC normal letters A&nbsp;-&nbsp;Z do not\n");
	out.print  ("all appear in consecutive code positions.\n");
	out.print  ("EBCDIC exists in different national variants\n");
	out.print  ("(cf. to <a href=\"#646\">variants of ASCII</a>).\n");
	out.print  ("For more information on EBCDIC, see section\n");
	out.print  ("     <a href=\"http://www.terena.nl/library/multiling/euroml/section05.html\"\n");
	out.print  ("     ><cite>IBM and EBCDIC</cite></a> in\n");
	out.print  (" <a href=\"http://www.terena.nl/library/multiling/euroml/JWvanWingen.html\">Johan W. van Wingen</a>'s\n");
	out.print  ("     <a href=\"http://www.terena.nl/library/multiling/euroml/mlcs5.html\"\n");
	out.print  ("     ><cite>Character sets. Letters, tokens and codes.</cite></a>.\n");
	out.print  ("</p>\n");
	out.print  ("\n");
	out.print  ("<h3><A NAME=\"10646\">ISO 10646, UCS, and Unicode</A></h3>\n");
	out.print  ("\n");
	out.print  ("<h4>ISO 10646, the standard</h4>\n");
	out.print  ("<P>\n");
	out.print  ("<dfn>ISO 10646</dfn>\n");
	out.print  ("(officially: ISO/IEC 10646) \n");
	out.print  ("is an international standard,\n");
	out.print  ("by\n");
	out.print  ("<A TITLE=\"International Organization for Standardization\"\n");
	out.print  (" HREF=\"http://www.iso.org/\">ISO</A> and\n");
	out.print  ("<a href=\"http://www.iec.ch\" title=\n");
	out.print  ("\"International Electrotechnical Commission\" \n");
	out.print  (">IEC</a>.\n");
	out.print  ("It defines UCS, Universal Character Set, which is\n");
	out.print  ("a very large and growing\n");
	out.print  ("<a href=\"#repertoire\">character repertoire</a>,\n");
	out.print  ("and a\n");
	out.print  ("<a href=\"#code\">character code</a>\n");
	out.print  ("for it.\n");
	out.print  ("Currently tens of thousands of characters have been defined,\n");
	out.print  ("and new amendments are defined fairly often.\n");
	out.print  ("<!-- see\n");
	out.print  ("a href=\n");
	out.print  ("\"http://www.iso.ch/isob/switch-engine-cate.pl?searchtype=refnumber&amp;KEYWORDS=10646\"\n");
	out.print  ("information about ISO 10646 standard documents /a at ISO.-->\n");
	out.print  ("It contains, among other things, all characters in the character\n");
	out.print  ("repertoires discussed above.\n");
	out.print  ("For a list of the character blocks in the repertoire, with\n");
	out.print  ("examples of some of them, see the document\n");
	out.print  ("<a href=\"ucs.html8\"><cite>UCS (ISO 10646, Unicode) character blocks</cite></a>.\n");
	out.print  ("</P>\n");
	out.print  ("<p><small>The number of the standard intentionally reminds us of\n");
	out.print  ("646, the number of the ISO standard corresponding to\n");
	out.print  ("<a href=\"#ascii\">ASCII</a>.</small></p>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"unicode\">Unicode, the more practical definition of UCS</a></h4>\n");
	out.print  ("<P>\n");
	out.print  ("<dfn>Unicode</dfn>\n");
	out.print  ("is a\n");
	out.print  ("<a title=\n");
	out.print  ("\"Information about Unicode standard\"\n");
	out.print  ("href=\"http://www.unicode.org/unicode/standard/standard.html\">standard</a>,\n");
	out.print  ("by the\n");
	out.print  ("<a href=\"http://www.unicode.org/\">Unicode Consortium</a>,\n");
	out.print  ("which defines a character repertoire and character code\n");
	out.print  ("intended to be fully compatible with ISO 10646,\n");
	out.print  ("and an encoding for it.\n");
	out.print  ("ISO 10646 is more general (abstract) in nature, whereas\n");
	out.print  ("Unicode \"imposes\n");
	out.print  ("additional constraints on implementations to ensure that\n");
	out.print  ("they treat characters uniformly across platforms and applications\",\n");
	out.print  ("as they say in section\n");
	out.print  ("<cite><a href=\"http://www.unicode.org/unicode/faq/unicode_iso.html\"\n");
	out.print  (">Unicode&nbsp;&amp;&nbsp;ISO&nbsp;10646</a></cite>\n");
	out.print  ("of\n");
	out.print  ("the\n");
	out.print  ("<cite><a\n");
	out.print  ("href=\"http://www.unicode.org/unicode/faq/\">Unicode FAQ</a></cite>.</p>\n");
	out.print  ("<p>Unicode was originally designed to be a 16-bit code, but it\n");
	out.print  ("was extended so that currently\n");
	out.print  ("code positions are expressed as integers\n");
	out.print  ("in the hexadecimal range 0..10FFFF\n");
	out.print  ("(decimal 0..1&nbsp;114&nbsp;111).\n");
	out.print  ("That space is divided into 16-bit\n");
	out.print  ("<!--a href=\"http://www.nada.kth.se/i18n/ucs/unicode-iso10646-oview.html#2\"\n");
	out.print  ("title=\"The structure of the coding space\"-->\n");
	out.print  ("\"planes\".\n");
	out.print  ("Until recently, the use of Unicode has mostly been limited to \n");
	out.print  ("\"Basic Multilingual Plane (BMP)\" consisting of the range\n");
	out.print  ("0..FFFF.</p> \n");
	out.print  ("\n");
	out.print  ("<p>The ISO 10646 and Unicode\n");
	out.print  ("<EM>character repertoire</EM> can be regarded as a\n");
	out.print  ("<EM>superset</EM> of\n");
	out.print  ("most character repertoires in use.\n");
	out.print  ("However, the <EM>code positions</EM> of characters vary from\n");
	out.print  ("one character code to another.</p>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"unicode-name\">\"Unicode\" is the commonly used name</a></h4>\n");
	out.print  ("\n");
	out.print  ("<p>In practice, people usually talk about Unicode rather than ISO 10646,\n");
	out.print  ("partly because we prefer names to numbers,\n");
	out.print  ("partly because Unicode is more explicit about the\n");
	out.print  ("<em>meanings</em> of characters, partly because\n");
	out.print  ("detailed information about Unicode\n");
	out.print  ("is available on the Web (see below).</p>\n");
	out.print  ("<p><small>Unicode version 1.0 used somewhat different\n");
	out.print  ("<a href=\"#names\">names</a> for some characters\n");
	out.print  ("than ISO 10646. In Unicode version, 2.0, the names were\n");
	out.print  ("made the same as in ISO 10646.\n");
	out.print  ("New <a href=\"http://www.unicode.org/unicode/standard/versions/\"\n");
	out.print  (">versions</a> of Unicode are expected to add new characters mostly.\n");
	out.print  ("<a href=\"http://www.unicode.org/unicode/standard/versions/Unicode3.0.html\"\n");
	out.print  (">Version&nbsp;3.0</a>, with a total number of\n");
	out.print  ("49,194 characters (38,887 in version&nbsp;2.1),\n");
	out.print  ("was published in February 2000, and version 4.0 has\n");
	out.print  ("96,248 characters.</small></p>\n");
	out.print  ("<P><small>Until recently, the ISO 10646 standard had not\n");
	out.print  ("been put onto the Web.\n");
	out.print  ("It is now available as a large (80 megabytes) zipped PDF file via the\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://isotc.iso.ch/livelink/livelink/fetch/2000/2489/Ittf_Home/PubliclyAvailableStandards.htm\"\n");
	out.print  (">Publicly Available Standards</a> page of ISO/IEC JTC1.\n");
	out.print  ("page.\n");
	out.print  ("It is available in printed form\n");
	out.print  ("from <a href=\"http://www.iso.ch/addresse/membodies.html\"\n");
	out.print  ("title=\"ISO members (contact info)\"\n");
	out.print  (">ISO member bodies</a>. But for most practical purposes,\n");
	out.print  ("the same information is in the Unicode standard.</small></p>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"unicode-info\">General information about ISO 10646 and Unicode</a></h4>\n");
	out.print  ("<p>For more information, see\n");
	out.print  ("<ul>\n");
	out.print  ("  <li> <a href=\"http://www.unicode.org/unicode/faq/\">Unicode FAQ</a> by\n");
	out.print  ("       the Unicode Consortium. It is fairly large but\n");
	out.print  ("divided into sections rather logically, except that section\n");
	out.print  ("<cite><a href=\"http://www.unicode.org/unicode/faq/basic_q.html\">\n");
	out.print  ("Basic Questions</a></cite> would be better labeled as \"Miscellaneous\".\n");
	out.print  (" <li> Roman Czyborra's\n");
	out.print  ("material on Unicode, such as\n");
	out.print  ("<a href=\"http://czyborra.com/unicode/motivation.html\"\n");
	out.print  ("><cite>Why do we need Unicode?</cite></a> and\n");
	out.print  ("<a href=\"http://czyborra.com/unicode/characters.html\" title=\n");
	out.print  ("\"A look at Unicode as a coded character set: Unicode's character repertoire and character numbering\"\n");
	out.print  ("       ><cite>Unicode's characters</cite></a>       \n");
	out.print  ("  <li> Olle Järnefors:\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://www.nada.kth.se/i18n/ucs/unicode-iso10646-oview.html\">\n");
	out.print  ("<CITE>A short overview of\n");
	out.print  ("ISO/IEC 10646 and Unicode</CITE></A>.\n");
	out.print  ("Very readable and informative, though somewhat outdated e.g.\n");
	out.print  ("as regards to\n");
	out.print  ("<a href=\"http://www.unicode.org/unicode/standard/versions/\"\n");
	out.print  (">versions of Unicode</a>.\n");
	out.print  ("(It also contains a more\n");
	out.print  ("detailed technical description of the UTF encodings than those\n");
	out.print  ("given above.)\n");
	out.print  ("  <li> <a href=\"http://www.cl.cam.ac.uk/~mgk25/\">Markus Kuhn</a>:\n");
	out.print  ("<a href=\"http://www.cl.cam.ac.uk/~mgk25/unicode.html\"\n");
	out.print  ("       ><cite>UTF-8 and Unicode FAQ for Unix/Linux</cite></a>. \n");
	out.print  ("Contains helpful general explanations as well as practical\n");
	out.print  ("implementation considerations.\n");
	out.print  ("<li> Steven J. Searle: <cite><a href=\n");
	out.print  ("\"http://tronweb.super-nova.co.jp/characcodehist.html\"\n");
	out.print  (">A Brief History of Character Codes\n");
	out.print  ("     in\n");
	out.print  ("     North America, Europe, and East Asia</a></cite>.\n");
	out.print  ("Contains a valuable historical review, including critical notes\n");
	out.print  ("on the \"unification\" of Chinese, Japanese and Korean (CJK)\n");
	out.print  ("characters.\n");
	out.print  ("<li> <a href=\n");
	out.print  ("\"http://www.alanwood.net/alan-wood.html\"\n");
	out.print  (">Alan Wood</a>:\n");
	out.print  ("<cite><a href=\"http://www.alanwood.net/unicode/utilities_editors.html\">Unicode and Multilingual Editors and Word Processors</a></cite>; some\n");
	out.print  ("     software tools for actually writing Unicode; I'd especially\n");
	out.print  ("     recommend taking a look at the free\n");
	out.print  ("<a href=\"http://www.sharmahd.com/unipad/unipad.en.html\">UniPad</a> editor (for Windows).\n");
	out.print  ("</ul>\n");
	out.print  ("\n");
	out.print  ("<div>There are also some <em>books</em> on Unicode:</div>\n");
	out.print  ("<UL>\n");
	out.print  ("<li>Jukka K. Korpela:\n");
	out.print  ("<a href=\"http://www.oreilly.com/catalog/unicode/\" \n");
	out.print  ("lang=\"en\">Unicode Explained</a>. O&#8217;Reilly, 2006.\n");
	out.print  ("<li>Tony Graham: <a href=\n");
	out.print  ("\"http://www.adwarereport.com/books-plain/0764546252.html\"\n");
	out.print  ("title=\"Adware Report : Unicode: A Primer\">\n");
	out.print  ("Unicode: A Primer</a>. Wiley, 2000.\n");
	out.print  ("<!-- Available for viewing online\n");
	out.print  ("page by page:\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.amazon.com/gp/reader/0764546252/ref=sib_dp_pt/104-3004196-5994368#reader-link\">Viewing &quot;Unicode: A Primer</a>. -->\n");
	out.print  ("<li>Richard Gillam: <a href=\n");
	out.print  ("\"http://www.amazon.com/exec/obidos/tg/detail/-/0201700522/qid=1111476373/sr=8-2/ref=pd_csp_2/104-3004196-5994368?v=glance&amp;s=books&amp;n=507846\"\n");
	out.print  ("title=\"Info on &quot;Unicode Demystified...&quot; at Amazon\"\n");
	out.print  (">Unicode Demystified: A Practical Programmer's Guide to the Encoding Standard</a>.\n");
	out.print  ("Addison-Wesley, 2002.\n");
	out.print  ("</UL>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"unicode-ref\">Reference information about ISO 10646 and Unicode</a></h4>\n");
	out.print  ("\n");
	out.print  ("<UL>\n");
	out.print  ("<li> <strong><a href=\n");
	out.print  ("\"http://www.unicode.org/versions/Unicode4.0.0/\"\n");
	out.print  ("     >Unicode 4.0 online</a></strong>: the standard itself, mostly\n");
	out.print  ("     in PDF format;\n");
	out.print  ("it's partly hard to read, so you might benefit from my <cite><a href=\"unicode/guide.html\">\n");
	out.print  ("Guide to the Unicode standard</a></cite>, which\n");
	out.print  ("      briefly explains the structure of\n");
	out.print  ("      the standard and how to find information about a particular character there\n");
	out.print  ("<li><a href=\"http://hapax.qc.ca/\"\n");
	out.print  ("lang=\"fr\" hreflang=\"fr\">Unicode et ISO 10646 en français</a>, \n");
	out.print  ("the Unicode standard in French\n");
	out.print  ("<LI> <strong><a href=\"http://www.unicode.org/charts/\" name=\"charts\"\n");
	out.print  (">Unicode charts</a></strong>, containing\n");
	out.print  ("     <a href=\"#name\">names</a>,\n");
	out.print  ("     <a href=\"#code\">code positions</a>,\n");
	out.print  (" and representative <a href=\"#glyph\">glyphs</a>\n");
	out.print  ("for the characters and notes on their usage.\n");
	out.print  ("Available in PDF format, containing\n");
	out.print  ("the same information as in the corresponding parts\n");
	out.print  ("	  of the printed standard.\n");
	out.print  ("<small>(The charts were previously available in faster-access format too,\n");
	out.print  ("as HTML documents containing\n");
	out.print  ("the charts as GIF images. But this version seems to have been\n");
	out.print  (" removed.)</small> \n");
	out.print  ("\n");
	out.print  ("<li> <a name=\"unidata\" href=\n");
	out.print  ("\"http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\"\n");
	out.print  ("     >Unicode database</a>, a large\n");
	out.print  ("     (over 460&nbsp;000 <a href=\"#octet\"\n");
	out.print  ("title=\"octet = byte = 8 bits\"\n");
	out.print  ("     >octets</a>) plain text file listing\n");
	out.print  ("     Unicode character <a href=\"#code\">code positions</a>,\n");
	out.print  ("     <a href=\"#names\">names</a>, and defined\n");
	out.print  ("character <a href=\"#prop\">properties</a> in a compact\n");
	out.print  ("     <a href=\n");
	out.print  ("\"http://www.unicode.org/Public/UNIDATA/UnicodeData.html\"\n");
	out.print  ("title=\"UnicodeData file format\"\n");
	out.print  ("     >notation</a>\n");
	out.print  ("<LI><A TITLE=\n");
	out.print  ("\"Title: ISO/IEC 10646-1 annex E as changed by technical corrigendum 1\"\n");
	out.print  ("HREF=\"http://osiris.dkuug.dk/JTC1/SC2/WG2/docs/annexe-r\">Informative\n");
	out.print  ("annex E</A> to ISO 10646-1:1993 (i.e., old version!), which \n");
	out.print  ("lists, in alphabetic order,\n");
	out.print  ("     all character <a href=\"#names\">names</a> (and the\n");
	out.print  ("     <a href=\"#code\">code positions</a>) except\n");
	out.print  ("Hangul and CJK ideographs; useful for finding out\n");
	out.print  ("the code position when you know the (right!) name of a character.\n");
	out.print  ("<li> <a href=\"http://www.eki.ee/letter/\">An online character database</a> by\n");
	out.print  ("Indrek Hein at the\n");
	out.print  ("<a href=\"http://www.eki.ee/\">Institute of the Estonian Language</a>.\n");
	out.print  ("You can e.g. search for\n");
	out.print  ("     <a href=\"#10646\">Unicode</a> characters by name or code position\n");
	out.print  ("and get the Unicode equivalents of characters in many widely used\n");
	out.print  ("     character sets.\n");
	out.print  ("<form action=\"http://www.eki.ee/letter/chardata.cgi\" class=\"noprint\">\n");
	out.print  ("<div>This simple interface to the database lets you retrieve\n");
	out.print  ("     information about a Unicode character by code position\n");
	out.print  ("(to be specified in hexadecimal, with four digits, as in\n");
	out.print  ("<code>U+<var>nnnn</var></code>):\n");
	out.print  ("<code>U+<input name=\"ucode\" value=\"\" size=10></code>\n");
	out.print  ("     <input type=\"submit\">\n");
	out.print  ("     </div></FORM>\n");
	out.print  ("<li><a href=\"html/unicode.html\">How to find an &amp;#number; \n");
	out.print  ("notation for a character</a>; contains some additional information\n");
	out.print  ("on how to find a Unicode number for a character\n");
	out.print  ("     </UL>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"ucs2\">Encodings for Unicode</a></h4>\n");
	out.print  ("<P>Originally, before extending the code range past 16 bits,\n");
	out.print  ("the \"native\" Unicode encoding was <dfn>UCS-2</dfn>, which\n");
	out.print  ("presents each code number as two consecutive octets\n");
	out.print  ("<VAR>m</VAR> and <VAR>n</VAR> so that the number\n");
	out.print  ("equals\n");
	out.print  ("256<VAR>m</VAR>+<VAR>n</VAR>. This means, to express it\n");
	out.print  ("in computer jargon, that the code number is presented as a\n");
	out.print  ("<strong>two-byte integer</strong>. According to the Unicode\n");
	out.print  ("consortium, the term UCS-2 should now be avoided, as it is\n");
	out.print  ("associated with the 16-bit limitations.</p>\n");
	out.print  ("<p>\n");
	out.print  ("<dfn>UTF-32</dfn> encodes each code position as a 32-bit binary\n");
	out.print  ("integer, i.e. as four octets.\n");
	out.print  ("This is a very obvious and simple encoding. However, it is\n");
	out.print  ("inefficient in terms of the number of octets needed. If we have\n");
	out.print  ("normal English text or other text which contains\n");
	out.print  ("<a href=\"#latin1\">ISO Latin&nbsp;1</a>\n");
	out.print  ("characters only,\n");
	out.print  ("the length of the Unicode encoded octet sequence is four times the\n");
	out.print  ("length of the string in ISO 8859-1 encoding. UTF-32 is rarely\n");
	out.print  ("used, except perhaps in internal operations (since it is very\n");
	out.print  ("simple for the purposes of string processing).</P>\n");
	out.print  ("\n");
	out.print  ("<p><dfn>UTF-16</dfn> represents each code position in the\n");
	out.print  ("Basic Multilingual Plane as two octets. Other code positions are\n");
	out.print  ("presented using so-called <dfn>surrogate pairs</dfn>, utilizing some\n");
	out.print  ("code positions in the BMP reserved for the purpose. This, too,\n");
	out.print  ("is a very simple encoding when the data contains BMP characters\n");
	out.print  ("only.</p>\n");
	out.print  ("<!--\n");
	out.print  ("It is somewhat debatable whether Unicode defines an encoding\n");
	out.print  ("or just a character code. However, it refers to code values\n");
	out.print  ("being presentable as 16-bit integers, and it seems to imply\n");
	out.print  ("the corresponding two-octet representation.\n");
	out.print  ("In principle, Unicode requires that \"Unicode values can\n");
	out.print  ("be stored in native 16-bit machine words\" and \"does not\n");
	out.print  ("specify any order of bytes inside a Unicode value\".\n");
	out.print  ("Thus, it allows \"<a href=\n");
	out.print  ("\"http://webopedia.internet.com/TERM/b/big_endian.html\"\n");
	out.print  ("title=\"Big-endian [and little-endian]\"\n");
	out.print  (">little-endian</a>\" presentation where the least\n");
	out.print  ("significant byte precedes the most significant byte, if agreed on\n");
	out.print  ("by higher-level protocols.\n");
	out.print  ("-->\n");
	out.print  ("<P>\n");
	out.print  ("<A NAME=\"utf\">Unicode</a>\n");
	out.print  ("can be, and often is, encoded in other ways, too,\n");
	out.print  ("such as the following encodings:\n");
	out.print  ("</P>\n");
	out.print  ("<DL>\n");
	out.print  ("<DT>\n");
	out.print  ("<A TITLE=\"RFC 3629: UTF-8, a transformation format of ISO 10646\"\n");
	out.print  (" HREF=\n");
	out.print  ("\"http://www.faqs.org/rfcs/rfc3629.html\"\n");
	out.print  ("     >\n");
	out.print  ("UTF-8</A></DT>\n");
	out.print  ("<DD>\n");
	out.print  ("Character codes less than 128\n");
	out.print  ("(effectively, the <A HREF=\"#ascii\">ASCII</A> repertoire) are presented \"as such\",\n");
	out.print  ("using one octet for each code (character)\n");
	out.print  ("All other codes are presented, according to a relatively\n");
	out.print  ("     complicated method, so that one code (character) is\n");
	out.print  ("presented as a sequence of two to four octets, each of which\n");
	out.print  ("     is in the range 128 - 255.  This means that in a sequence of\n");
	out.print  ("octets, octets in the range 0 - 127 (\"bytes with most significant bit\n");
	out.print  ("set to 0\") directly represent <A HREF=\"#ascii\">ASCII</A> characters,\n");
	out.print  ("whereas octets in the range 128 - 255 (\"bytes with most significant\n");
	out.print  ("bit set to 1\") are to be interpreted as really encoded presentations\n");
	out.print  ("of characters.  </DD>\n");
	out.print  ("<DT> <A NAME=\"utf7\" TITLE= \"RFC 2152: UTF-7 - A\n");
	out.print  ("Mail-Safe Transformation Format of Unicode\"\n");
	out.print  ("HREF=\"ftp://nis.nsf.net/internet/documents/rfc/rfc2152.txt\">UTF-7</A> </DT> <DD>\n");
	out.print  ("Each character code is presented as a sequence of one or more octets\n");
	out.print  ("in the range 0 - 127 (\"bytes with most significant bit set to 0\",\n");
	out.print  ("     or \"seven-bit bytes\", hence the name). Most <A HREF=\"#ascii\">ASCII</A> characters\n");
	out.print  ("     are presented as such, each as one octet, but for obvious\n");
	out.print  ("     reasons some octet values must be reserved for use as\n");
	out.print  ("     \"escape\" octets, specifying the octet together with a certain\n");
	out.print  ("     number of subsequent octets forms a multi-octet encoded\n");
	out.print  ("     presentation of one character. There is an\n");
	out.print  ("     <a href=\"#utf7ex\">example of using UTF-7</a> later in this document.\n");
	out.print  ("</DD>\n");
	out.print  ("</DL>\n");
	out.print  ("<P><cite>IETF Policy on Character Sets and Languages</cite>\n");
	out.print  ("(<a href=\"ftp://ftp.isi.edu/in-notes/rfc2277.txt\">RFC 2277</a>)\n");
	out.print  ("clearly <strong>favors UTF-8</strong>. It requires\n");
	out.print  ("support to it in Internet protocols (and doesn't even\n");
	out.print  ("mention UTF-7). Note that UTF-8 is efficient, if the data\n");
	out.print  ("consists dominantly of ASCII characters with just a few\n");
	out.print  ("\"special characters\" in addition to them, and reasonably efficient\n");
	out.print  ("for dominantly ISO Latin&nbsp;1 text.</p>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"support\">Support to Unicode characters</a></h4>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("The <strong>implementation</strong> of Unicode support is a long and\n");
	out.print  ("mostly gradual process. Unicode can be\n");
	out.print  ("supported by programs on any operating systems, although some\n");
	out.print  ("systems may allow much easier implementation than others;\n");
	out.print  ("this mainly depends on whether the system uses Unicode internally\n");
	out.print  ("so that support to Unicode is \"built-in\".\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("<a name=\"mes\">Even in circumstances where Unicode\n");
	out.print  ("is supported\n");
	out.print  ("in principle, the support usually does not cover\n");
	out.print  ("<em>all</em> Unicode characters.</a>\n");
	out.print  ("For example, a <a href=\"#font\">font</a> available may cover\n");
	out.print  ("just some part of Unicode which is practically important in \n");
	out.print  ("some area.\n");
	out.print  ("On the other hand, for data transfer it is essential to know\n");
	out.print  ("which Unicode characters the recipient is able to handle.\n");
	out.print  ("For such reasons, various <strong>subsets</strong> of the\n");
	out.print  ("Unicode character repertoire have been and will be defined.\n");
	out.print  ("For example, the <CITE>Minimum European Subset</CITE>\n");
	out.print  ("specified by\n");
	out.print  ("<A HREF=\"http://www.indigo.ie/egt/standards/mes.html\"\n");
	out.print  ("title=\"Technical contents of ENV 1973:1995\"\n");
	out.print  (">ENV 1973:1995</A>\n");
	out.print  ("was intended to\n");
	out.print  ("provide a first step towards the implementation\n");
	out.print  ("of large character sets in Europe. \n");
	out.print  ("It was replaced by\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.evertype.com/standards/iso10646/pdf/cwa13873.pdf\"\n");
	out.print  ("title=\"Multilingual European Subsets in ISO/IEC 10646-1 (PDF format)\"\n");
	out.print  (">three <cite>Multilingual European Subsets</cite></a>\n");
	out.print  ("(MES-1, MES-2, MES-3, with MES-2 based on the\n");
	out.print  ("<cite>Minimum European Subset</cite>), defined in a\n");
	out.print  ("<!--a href=\"http://www.cenorm.be/cenorm/businessdomains/businessdomains/informationsocietystandardizationsystem/published+cwas/cwa+download+area.asp\"\n");
	out.print  ("hreflang=\"en\" title=\n");
	out.print  ("\"CWA Download area\"\n");
	out.print  ("-->CEN Workshop \n");
	out.print  ("Agreement, namely CWA&nbsp;13873.</p>\n");
	out.print  ("<!-- was: \"http://www.cenorm.be/isss/CWAs/cwalist.htm\" -->\n");
	out.print  ("\n");
	out.print  ("<p><small><a name=\"wgl4\">In addition to international standards, there are company policies\n");
	out.print  ("which define various subsets of the character repertoire.</a>\n");
	out.print  ("A practically important one is\n");
	out.print  ("Microsoft's\n");
	out.print  ("<a href=\"http://www.eu.microsoft.com/typography/otspec/wgl4.htm\"\n");
	out.print  ("title=\"WGL4.0 Character Set\"\n");
	out.print  (">\"Windows Glyph List&nbsp;4\" (WGL4)</a>, or \"PanEuropean\"\n");
	out.print  ("character set, characterized on Microsoft's page\n");
	out.print  ("<cite><a href=\"http://www.microsoft.com/typography/unicode/cscp.htm\"\n");
	out.print  (">Character sets and codepages</a></cite> and excellently listed on page\n");
	out.print  ("<cite><a href=\"http://www.alanwood.net/demos/wgl4.html\"\n");
	out.print  (">Using Special Characters from Windows Glyph List&nbsp;4 (WGL4) in HTML</a></cite>\n");
	out.print  ("by <a href=\"http://www.alanwood.net/demos/index.html\">Alan Wood</a>.</small></P>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"U\">The <strong><CODE>U+</CODE><VAR>nnnn</VAR></strong>\n");
	out.print  ("notation</a></h4>\n");
	out.print  ("<P>\n");
	out.print  ("Unicode characters are often referred to using a\n");
	out.print  ("notation of the\n");
	out.print  ("form\n");
	out.print  ("<strong><CODE>U+</CODE><VAR>nnnn</VAR></strong>\n");
	out.print  ("where\n");
	out.print  ("<VAR>nnnn</VAR> is a four-digit\n");
	out.print  ("<A TITLE=\n");
	out.print  ("'description of hexadecimal notation (in Free On-line Dictionary of Computing)'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=hexadecimal&amp;action=Search\"\n");
	out.print  (">hexadecimal</A>\n");
	out.print  ("notation of the\n");
	out.print  ("code value. For example, <CODE>U+0020</CODE> means the space\n");
	out.print  ("character (with code value 20 in hexadecimal, 32 in decimal).\n");
	out.print  ("Notice that such notations identify a character through its\n");
	out.print  ("Unicode code value, without referring to any particular\n");
	out.print  ("encoding. There are other\n");
	out.print  ("<a href=\"#identify\">ways to mention (identify) a character</a>, too.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<H2><A NAME=\"characters\">More about the character concept</A></H2>\n");
	out.print  ("\n");
	out.print  ("<blockquote class=\"motto\">\n");
	out.print  ("<div>An \"A\" (or any other character) is something like a Platonic entity: it is the idea of an \"A\" and not the \"A\" itself.</div> \n");
	out.print  ("<div class=\"credit\">-- Michael E. Cohen:\n");
	out.print  ("<cite><a href=\n");
	out.print  ("\"http://www.humnet.ucla.edu/hcf/news/archive/fall1998/fontspart1.html\"\n");
	out.print  (">Text and Fonts in a Multi-lingual Cross-platform World</a></cite>.\n");
	out.print  ("</div></blockquote>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("The <DFN>character</DFN> concept is very fundamental for the\n");
	out.print  ("issues discussed here but difficult to define exactly. The\n");
	out.print  ("more fundamental concepts we use, the harder it is to give\n");
	out.print  ("good definitions. (How would you define \"life\"? Or \"structure\"?)\n");
	out.print  ("Here we will concentrate on clarifying the character concept\n");
	out.print  ("by indicating what it does <em>not</em> imply.\n");
	out.print  ("</P>\n");
	out.print  ("<h3><a name=\"unicode-view\">The Unicode view</a></h3>\n");
	out.print  ("<P>\n");
	out.print  ("The <A HREF=\"#10646\">Unicode</A> standard\n");
	out.print  ("describes characters as\n");
	out.print  ("\"the smallest components of written language that have semantic\n");
	out.print  ("value\", which is somewhat misleading. A character such as a letter\n");
	out.print  ("can hardly be described as having a meaning (semantic value) in itself.\n");
	out.print  ("Moreover, a character such as\n");
	out.print  ("<a href=\"latin1/3.html#FA\">&uacute;</a>\n");
	out.print  ("(letter u with acute accent), which belongs to Unicode, can\n");
	out.print  ("often be regarded as consisting of smaller components: a letter and\n");
	out.print  ("a <a href=\"#diacritic\">diacritic</a>.\n");
	out.print  ("And in fact the very definition of the character concept in\n");
	out.print  ("Unicode is the following:\n");
	out.print  ("</P>\n");
	out.print  ("<BLOCKQUOTE>\n");
	out.print  ("<DIV>\n");
	out.print  ("<I>abstract character</I>: a unit of information used for the\n");
	out.print  ("organization, control, or representation of textual data.\n");
	out.print  ("</DIV>\n");
	out.print  ("</BLOCKQUOTE>\n");
	out.print  ("<P><small>(In Unicode terminology, \"abstract character\" is a character as\n");
	out.print  ("an element of a character repertoire, whereas \"character\"\n");
	out.print  ("refers to \"coded character representation\", which effectively\n");
	out.print  ("means a code value.\n");
	out.print  ("It would be natural to assume that the opposite of an abstract\n");
	out.print  ("character is a concrete character, as something that actual appears\n");
	out.print  ("in some physical form on paper or screen; but oh no,\n");
	out.print  ("the Unicode concept \"character\" is more concrete than an \"abstract\n");
	out.print  ("character\" only in the sense that it has a fixed code position!\n");
	out.print  ("An actual physical\n");
	out.print  ("form of an abstract character,\n");
	out.print  ("with a specific shape and size, is a <a href=\"#glyph\">glyph</a>.\n");
	out.print  ("Confusing, isn't it?)</small></P>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"control\">Control characters (control codes)</a></h3>\n");
	out.print  ("<p>\n");
	out.print  ("The r&ocirc;le of the so-called\n");
	out.print  ("<dfn>control characters</dfn>\n");
	out.print  ("in character codes\n");
	out.print  ("is somewhat obscure.\n");
	out.print  ("Character codes often contain code positions which are\n");
	out.print  ("not assigned to any visible character but reserved\n");
	out.print  ("for control purposes. For example, in communication\n");
	out.print  ("between a terminal and a computer using the\n");
	out.print  ("<a href=\"#ascii\">ASCII</a> code, the computer could\n");
	out.print  ("regard <a href=\"#octet\">octet</a>&nbsp;3 as a request for\n");
	out.print  ("terminating the currently running process. Some older\n");
	out.print  ("character code standards contain <em>explicit descriptions</em>\n");
	out.print  ("of such conventions whereas newer standards just\n");
	out.print  ("<em>reserve some positions</em> for such usage, to be\n");
	out.print  ("defined in\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.itscj.ipsj.or.jp/ISO-IR/\"\n");
	out.print  ("title=\n");
	out.print  ("\"International register of coded character sets to be used with escape sequences\"\n");
	out.print  (">separate standards or agreements</a>\n");
	out.print  ("such as\n");
	out.print  ("<a href=\"http://www.itscj.ipsj.or.jp/ISO-IR/2-5.htm\"\n");
	out.print  (">\"C0 controls\"</a>\n");
	out.print  ("(tabulated in my document on\n");
	out.print  ("<a href=\"chars/c0.html\">\n");
	out.print  ("ASCII control codes</a>)\n");
	out.print  ("and\n");
	out.print  ("<a href=\"http://www.itscj.ipsj.or.jp/ISO-IR/2-6.htm\"\n");
	out.print  (">\"C1 controls\"</a>, or specifically\n");
	out.print  ("<a href=\"http://www.itscj.ipsj.or.jp/ISO-IR/077.pdf\"\n");
	out.print  ("title=\n");
	out.print  ("\"C1 Control Character Set of ISO 6429 (PDF format)\"\n");
	out.print  (">ISO 6429</a>.\n");
	out.print  ("And although the definition quoted above suggests that\n");
	out.print  ("\"control characters\" might be regarded as characters in\n");
	out.print  ("the Unicode terminology, perhaps it is more natural to\n");
	out.print  ("regard them as\n");
	out.print  ("<dfn><a href=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=control+code\"\n");
	out.print  ("title=\n");
	out.print  ("\"Def. of &quot;control code&quot; in Free On-Line Dictionary of Computing\"\n");
	out.print  (">control codes</a></dfn>.</p>\n");
	out.print  ("<p>Control codes can be used for <strong>device control</strong> such as cursor movement,\n");
	out.print  ("page eject, or changing colors. Quite often they are used in combination with\n");
	out.print  ("codes for graphic characters, so that a device\n");
	out.print  ("<a href=\"http://webopedia.internet.com/TERM/d/driver.html\">driver</a> is expected\n");
	out.print  ("to interpret the combination as a specific command and not display the\n");
	out.print  ("graphic character(s) contained in it. For example, in the classical\n");
	out.print  ("<a href=\"http://www.cs.utk.edu/~shuford/terminal/dec.html\"\n");
	out.print  ("title=\"The DEC VT100 and Its Successors...\" \n");
	out.print  (">VT100 controls</a>, ESC followed by the code corresponding to the letter \"A\"\n");
	out.print  ("or something more complicated\n");
	out.print  ("(depending on mode settings) moves the cursor up.\n");
	out.print  ("To take a different example, the\n");
	out.print  ("<a href=\"http://www.geek-girl.com/emacs/emacs.html\"\n");
	out.print  ("title=\"Emacs reference materials\"\n");
	out.print  (">Emacs</a> editor treats ESC&nbsp;A as a request\n");
	out.print  ("to move to the beginning of a sentence.\n");
	out.print  ("Note that the ESC control code\n");
	out.print  ("is logically distinct from the ESC <em>key</em> in a keyboard,\n");
	out.print  ("and many other things than pressing ESC might cause the ESC control code\n");
	out.print  ("to be sent.\n");
	out.print  ("Also note that phrases like <a href=\"#esc\">\"escape sequences\"</a> are often\n");
	out.print  ("used to refer to things that don't involve ESC at all and operate at a quite\n");
	out.print  ("different level.\n");
	out.print  ("<a href=\"http://web.archive.org/web/20041012084602/http://www.bobbemer.com/\">Bob Bemer</a>, the inventor of ESC,\n");
	out.print  ("has written a \"vignette\" about it:\n");
	out.print  ("<cite><a href=\"http://web.archive.org/web/20041012084602/http://www.bobbemer.com/ESCAPE.HTM\"\n");
	out.print  (">That Powerful ESCAPE <nobr>Character --</nobr>\n");
	out.print  ("Key and Sequences</a></cite>.</p>\n");
	out.print  ("\n");
	out.print  ("<p>One possible form of device control is changing the way a device interprets\n");
	out.print  ("the data (octets) that it receives.\n");
	out.print  ("For example, a control code followed by some data in a specific format might\n");
	out.print  ("be interpreted so that any subsequent octets to be interpreted according to\n");
	out.print  ("a table identified in some specific way. This is often called \"code page switching\", and\n");
	out.print  ("it means that control codes could be used\n");
	out.print  ("<strong>change the character encoding</strong>. And it is then more logical\n");
	out.print  ("to consider the control codes and associated data at the level of fundamental\n");
	out.print  ("interpretation of data rather than direct device\n");
	out.print  ("control.\n");
	out.print  ("The international standard\n");
	out.print  ("<a href=\"http://www.stadlar.is/TC304/GUIDE/gis2022.htm\"\n");
	out.print  ("title=\"8-bit Character Sets - ISO/IEC 2022\"\n");
	out.print  (">ISO 2022</a> defines powerful facilities for using different\n");
	out.print  ("8-bit character codes in a document.</p>\n");
	out.print  ("\n");
	out.print  ("<p>Widely used <strong>formatting</strong> control codes include carriage return (CR),\n");
	out.print  ("linefeed (LF), and horizontal tab (HT), which in\n");
	out.print  ("<a href=\"#ascii\">ASCII</a> occupy code positions 13, 10, and&nbsp;9.\n");
	out.print  ("The names (or abbreviations) suggest generic meanings, but the actual\n");
	out.print  ("meanings are defined partly in each character code definition, partly&nbsp;- and\n");
	out.print  ("more importantly&nbsp;- by various other conventions \"above\" the character level.\n");
	out.print  ("The \"formatting\" codes\n");
	out.print  ("might be seen as a special case of device control, in a sense,\n");
	out.print  ("but more naturally, a CR or a LF or a CR&nbsp;LF pair (to mention the most\n");
	out.print  ("common conventions) when used in a text file simply indicates a new line.\n");
	out.print  ("As regards to\n");
	out.print  ("control codes used for line structuring, see\n");
	out.print  ("Unicode technical report #13\n");
	out.print  ("<a href=\"http://www.unicode.org/unicode/reports/tr13/\"\n");
	out.print  ("><cite>Unicode Newline Guidelines</cite></a>.\n");
	out.print  ("See also my\n");
	out.print  ("<cite><a href=\"unicode/linebr.html\">Unicode line breaking rules: explanations and criticism</a></cite>.\n");
	out.print  ("The <a href=\"http://webopedia.internet.com/TERM/t/tab_character.html\">HT (TAB) character</a>\n");
	out.print  ("is often used for real \"tabbing\" to some predefined writing position.\n");
	out.print  ("But it is also used e.g. for indicating data boundaries, without any particular\n");
	out.print  ("presentational effect, for example in the widely used\n");
	out.print  ("\"tab separated values\"\n");
	out.print  ("(<a href=\n");
	out.print  ("\"TSV.html\"\n");
	out.print  ("title=\"Tab Separated Values (TSV): a format for tabular data exchange\"\n");
	out.print  (">TSV</a>) data format.</p>\n");
	out.print  ("<p class=\"deem\">\n");
	out.print  ("A control code, or a \"control character\" cannot have\n");
	out.print  ("a graphic presentation (a <a href=\"#glyph\">glyph</a>) in the same way as\n");
	out.print  ("normal characters have.\n");
	out.print  ("However, in <a href=\"#10646\">Unicode</a> there is a separate block\n");
	out.print  ("<cite><a href=\"http://www.unicode.org/charts/PDF/U2400.pdf\"\n");
	out.print  (">Control Pictures</a></cite> which contains characters that can be used\n");
	out.print  ("to indicate the presence of a control code.\n");
	out.print  ("<img src=\"unicode/241B.gif\" alt=\n");
	out.print  ("\"For example, the symbol for escape contains the letters E, S, C in\n");
	out.print  ("an descending sequence.\"\n");
	out.print  ("border=\"1\" align=\"right\">\n");
	out.print  ("They are of course quite distinct\n");
	out.print  ("from the control codes they symbolize&nbsp;-\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=241B\"><code>U+241B</code></a>\n");
	out.print  ("<span class=\"charname\">symbol for escape</span>\n");
	out.print  ("is not the same as\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=001B\"><code>U+001B</code></a>\n");
	out.print  ("<span class=\"charname\">escape</span>!\n");
	out.print  ("On the other hand,  \n");
	out.print  ("a control code might occasionally be displayed, by some programs, in\n");
	out.print  ("a visible form, perhaps describing the control action rather than the code.\n");
	out.print  ("For example, upon receiving octet 3 in the\n");
	out.print  ("example situation above, a program might echo back (onto\n");
	out.print  ("the terminal) <samp>***</samp> or <samp>INTERRUPT</samp>\n");
	out.print  ("or <samp>^C</samp>. All such notations\n");
	out.print  ("are program-specific conventions. Some control codes are\n");
	out.print  ("sometimes <em>named</em> in a manner which seems to bind\n");
	out.print  ("them to characters. In particular, control codes 1, 2, 3, ...\n");
	out.print  ("are often called control-A, control-B, control-C, etc. (or\n");
	out.print  ("CTRL-A or C-A or whatever). This is associated with the\n");
	out.print  ("fact that on many keyboards, control codes can be produced\n");
	out.print  ("(for sending to a computer) using a special key labeled\n");
	out.print  ("\"Control\" or \"Ctrl\" or \"CTR\" or something like that\n");
	out.print  ("together with letter keys A, B, C, ...\n");
	out.print  ("This in turn is related to the fact that the <a href=\"#code\">code\n");
	out.print  ("numbers</a> of characters and control codes have been assigned so that\n");
	out.print  ("the code of \"Control-<var>X</var>\" is obtained from \n");
	out.print  ("the code of the upper case letter <var>X</var> by a simple operation\n");
	out.print  ("(subtracting 64 decimal).\n");
	out.print  ("But such things\n");
	out.print  ("imply no real relationships between letters and control codes.\n");
	out.print  ("The control code 3, or \"Control-C\",  is <em>not</em> a variant\n");
	out.print  ("of letter C at all, and its meaning is not associated with the\n");
	out.print  ("meaning of&nbsp;C. \n");
	out.print  ("</p>\n");
	out.print  ("<table align=\"right\" border=\"1\" cellpadding=\"6\" cellspacing=\"0\"\n");
	out.print  ("style=\"margin:1ex\">\n");
	out.print  ("<caption><small>Example: a letter and different glyphs for it</small></caption>\n");
	out.print  ("<tr><td colspan=\"5\" align=center>\n");
	out.print  ("<span class=\"charname\">latin capital letter z</span> <code>(U+00E9)</code>\n");
	out.print  ("<tr align=\"center\">\n");
	out.print  ("<td>Z<td><i>Z</i><td><b>Z</b><td><tt>Z</tt>\n");
	out.print  ("<td><big style=\n");
	out.print  ("'font-family : \"Lucida Handwriting\",  Western, fantasy;'>Z</big>\n");
	out.print  ("</tr>\n");
	out.print  ("</table>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"glyph\">A glyph - a visual appearance</a></h3>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("It is important to distinguish the character concept from the\n");
	out.print  ("glyph concept. A\n");
	out.print  ("<DFN>glyph</DFN> is a presentation of a particular shape\n");
	out.print  ("which a character may have when rendered or displayed.\n");
	out.print  ("For example, the character Z might be presented\n");
	out.print  ("as a boldface <B>Z</B> or as an italic <I>Z</I>, and it\n");
	out.print  ("would still be a presentation of the same character.\n");
	out.print  ("On the other hand, lower-case z is defined to be a separate\n");
	out.print  ("character - which in turn may have different glyph presentations.\n");
	out.print  ("<P>\n");
	out.print  ("This is ultimately a <em>matter of definition</em>: a definition\n");
	out.print  ("of a character repertoire specifies the \"identity\" of characters,\n");
	out.print  ("among other things. One <em>could</em> define a repertoire\n");
	out.print  ("where uppercase Z and lowercase z are just two glyphs for the same\n");
	out.print  ("character. On the other hand, one <em>could</em> define that\n");
	out.print  ("italic <I>Z</I> is a character different from normal Z, not just\n");
	out.print  ("a different glyph for it. In fact, in\n");
	out.print  ("<a href=\"#10646\">Unicode</a> for example there are several\n");
	out.print  ("characters which could be regarded as typographic variants of letters\n");
	out.print  ("only, but for various reasons Unicode defines them as separate\n");
	out.print  ("characters. For example, mathematicians use a variant of letter&nbsp;N\n");
	out.print  ("to denote the set of natural numbers (0, 1, 2, ...), and this\n");
	out.print  ("variant is defined as being a separate character\n");
	out.print  ("(\"double-struck capital&nbsp;N\")\n");
	out.print  ("in Unicode.\n");
	out.print  ("There are some more <a href=\"#identity\">notes on the identity\n");
	out.print  ("of characters</a> below.\n");
	out.print  ("<p>The design of glyphs has several aspects, both practical and esthetic.\n");
	out.print  ("For an interesting review of a major company's description of its\n");
	out.print  ("principles and practices, see\n");
	out.print  ("Microsoft's\n");
	out.print  ("<cite><a href=\n");
	out.print  ("\"https://docs.microsoft.com/en-us/typography/develop/character-design-standards/index\"\n");
	out.print  (">Character design standards</a></cite> (in its\n");
	out.print  ("<a href=\n");
	out.print  ("\"https://docs.microsoft.com/en-us/typography/\"\n");
	out.print  ("title=\"Microsoft Typography\"\n");
	out.print  (">typography pages</a>).</p>\n");
	out.print  ("\n");
	out.print  ("<p><small>Some discussions, such as ISO 9541-1 and\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.iso.ch/iso/en/ittf/PubliclyAvailableStandards/C027163e.zip\"\n");
	out.print  ("title=\"An operational model for characters and glyphs\"\n");
	out.print  (">ISO/EC TR 15285</a>, make a further distinction between\n");
	out.print  ("\"<dfn>glyph image</dfn>\",\n");
	out.print  ("which is an actual appearance of a glyph, and \"glyph\",\n");
	out.print  ("which is a more abstract notion. In such an approach, \"glyph\" is close\n");
	out.print  ("to the concept of \"character\", except that a glyph may present a combination\n");
	out.print  ("of several characters. Thus, in that approach, the abstract characters \"f\"\n");
	out.print  ("and \"i\" might be represented using an abstract glyph that combines the two\n");
	out.print  ("characters into a ligature, which itself might have different physical\n");
	out.print  ("manifestations. Such approaches need to be treated as different from the\n");
	out.print  ("issue of treating ligatures as (compatibility) characters.</small></p>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"names\">What's in a name?</a></h3>\n");
	out.print  ("\n");
	out.print  ("<p>The <dfn>names</dfn> of characters\n");
	out.print  ("are <strong>assigned identifiers</strong>\n");
	out.print  ("rather than definitions.\n");
	out.print  ("Typically the names are selected so that they\n");
	out.print  ("contain only letters A&nbsp;-&nbsp;Z, spaces, and hyphens;\n");
	out.print  ("often uppercase variant is the reference spelling of a character name. \n");
	out.print  ("(See <a href=\n");
	out.print  ("\"http://www.stadlar.is/TC304/guidecharactersets/guideannexb.html#_Toc443308172\"\n");
	out.print  (">naming guidelines of the UCS</a>.)\n");
	out.print  ("<!--was: \"http://www.stri.is/TC304/GUIDE/gucsch06.htm#naming\"-->\n");
	out.print  ("The same character may have different names in different definitions\n");
	out.print  ("of character repertoires.\n");
	out.print  ("Generally the name is intended to <strong>suggest a generic meaning\n");
	out.print  ("and scope</strong> of use.\n");
	out.print  ("But the <a href=\"#10646\">Unicode</a> standard warns\n");
	out.print  ("(mentioning <a href=\"latin1/3.html#2E\"><span class=\"charname\">full stop</span></a>\n");
	out.print  ("as an example of a character with varying usage):\n");
	out.print  ("<blockquote><div>A character may have a broader range of use than the most\n");
	out.print  ("literal interpretation of its name might indicate; coded\n");
	out.print  ("representation, name, and representative glyph need\n");
	out.print  ("to be taken in context when establishing the semantics of a\n");
	out.print  ("character.</div></blockquote>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"repdef\">Glyph variation</a></h3>\n");
	out.print  ("\n");
	out.print  ("<P>\n");
	out.print  ("When a character repertoire is defined\n");
	out.print  ("(e.g. in a standard),\n");
	out.print  ("<EM>some</EM> particular\n");
	out.print  ("glyph is often used to describe the appearance of each character, but this\n");
	out.print  ("should be taken as an example only.\n");
	out.print  ("The <a href=\"#10646\">Unicode</a> standard specifically says (in\n");
	out.print  ("section&nbsp;3.2) that\n");
	out.print  ("great variation is allowed between\n");
	out.print  ("\"representative\n");
	out.print  ("glyph\" appearing in the standard and\n");
	out.print  ("a glyph used for the corresponding character:\n");
	out.print  ("<BLOCKQUOTE><P>\n");
	out.print  ("Consistency with the representative glyph does not require that\n");
	out.print  ("the images be identical or even graphically similar; rather, it\n");
	out.print  ("means that both images are generally recognized to be\n");
	out.print  ("representations of the same character. Representing the\n");
	out.print  ("character U+0061 <span class=\"charname\">Latin small letter a</span>\n");
	out.print  ("by the glyph \"X\" would violate its character identity.\n");
	out.print  ("</P></BLOCKQUOTE>\n");
	out.print  ("<p>Thus, the definition of a repertoire is not a matter of just\n");
	out.print  ("listing <em>glyphs</em>, but neither is it a matter of defining exactly\n");
	out.print  ("the <em>meanings</em> of characters.\n");
	out.print  ("It's actually an exception\n");
	out.print  ("rather than a rule that a character repertoire definition\n");
	out.print  ("explicitly says something about the meaning and use of a character.\n");
	out.print  ("<p>\n");
	out.print  ("<a name=\"prop\">Possibly some <em>specific properties</em></a>\n");
	out.print  ("(e.g. being classified\n");
	out.print  ("as a letter or having numeric value in the sense that digits have)\n");
	out.print  ("are defined, as in the <a href=\"#unidata\">Unicode database</a>,\n");
	out.print  ("but such properties are rather general in nature.\n");
	out.print  ("<p>\n");
	out.print  ("This vagueness may sound irritating,\n");
	out.print  ("and it often is. But an essential point to be noted is that\n");
	out.print  ("<strong>quite a lot of information is implied</strong>.\n");
	out.print  ("You are expected to deduce what the character is, using both\n");
	out.print  ("the character name and its representative glyph, and perhaps\n");
	out.print  ("context too, like the grouping of characters under different\n");
	out.print  ("headings like \"currency symbols\".\n");
	out.print  ("</p>\n");
	out.print  ("<P>\n");
	out.print  ("For more information on the glyph concept, see \n");
	out.print  ("the document\n");
	out.print  ("<cite><a href=\"http://www.iso.ch/iso/en/ittf/PubliclyAvailableStandards/C027163e.zip\"\n");
	out.print  (">An operational model for characters and glyphs</a></cite>\n");
	out.print  ("(ISO/IEC TR 15285:1998)\n");
	out.print  ("and\n");
	out.print  ("Apple's\n");
	out.print  ("document\n");
	out.print  ("<!--\"http://developer.apple.com/techpubs/mac/TextEncodingCMgr/TECRefBook-141.html#HEADING141-12\"-->\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://developer.apple.com/techpubs/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.a1.html\"\n");
	out.print  (">Characters, Glyphs, and Related Terms</a>\n");
	out.print  ("<!-- http://www.stonehand.com/unicode/standard/cgmodel.html\n");
	out.print  ("Character/Glyph Model -->\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"font\">Fonts</a></h3>\n");
	out.print  ("<P>\n");
	out.print  ("A repertoire of\n");
	out.print  ("<a href=\"#glyph\">glyph</a>s comprises a\n");
	out.print  ("<DFN>font</DFN>.\n");
	out.print  ("In a more technical sense, as the implementation of a font,\n");
	out.print  ("a font is a <em>numbered</em> set of glyphs.\n");
	out.print  ("The numbers correspond to <a href=\"#code\">code positions</a>\n");
	out.print  ("of the characters (presented by the glyphs).\n");
	out.print  ("Thus, a font in that sense is character code dependent.\n");
	out.print  ("An expression like \"Unicode font\" refers to such issues and\n");
	out.print  ("does not imply that the font contains glyphs for <em>all</em>\n");
	out.print  ("<a href=\"#10646\">Unicode</a> characters.\n");
	out.print  ("<p>\n");
	out.print  ("It is possible that\n");
	out.print  ("a font which is used for the presentation of some character\n");
	out.print  ("repertoire\n");
	out.print  ("does not contain\n");
	out.print  ("a <em>different</em> glyph for each character.\n");
	out.print  ("For example, although characters\n");
	out.print  ("such as Latin uppercase A,\n");
	out.print  ("Cyrillic uppercase A, and\n");
	out.print  ("Greek uppercase alpha are regarded as distinct characters\n");
	out.print  ("(with distinct code values) in\n");
	out.print  ("<a href=\"#10646\">Unicode</a>, a particular font might contain\n");
	out.print  ("just one A which is used to present all of them.\n");
	out.print  ("(For information about fonts, there is a very large\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://www.faqs.org/faqs/by-newsgroup/comp/comp.fonts.html\"\n");
	out.print  ("TITLE=\"Comp.fonts Newsgroup FAQs\"\n");
	out.print  ("><CITE>comp.font FAQ</CITE></A>, but it's rather old: last update in 1996.\n");
	out.print  ("The\n");
	out.print  ("<!--\"http://www.faqs.org/faqs/internationalization/font-faq/\"-->\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.cs.uu.nl/wais/html/na-dir/internationalization/font-faq.html\"\n");
	out.print  ("><cite\n");
	out.print  (">Finding Fonts for Internationalization FAQ</cite></a> is dated, too.)\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("<A NAME=\"look\">\n");
	out.print  ("<EM>You should never use a character just because it\n");
	out.print  ("\"looks right\"</EM> or \"almost right\".</A>\n");
	out.print  ("Characters with quite different purposes and meanings may well\n");
	out.print  ("look similar, or almost similar, in some\n");
	out.print  ("<a href=\"#font\">font</a>s at least.\n");
	out.print  ("Using a character as a surrogate for another for the sake of\n");
	out.print  ("apparent similarity may lead to great confusion.\n");
	out.print  ("Consider, for example, the so-called sharp&nbsp;s (es-zed), which is\n");
	out.print  ("used in the German language.\n");
	out.print  ("Some people who have noticed such a character in the\n");
	out.print  ("<a href=\"#latin1\">ISO Latin 1</a> repertoire have thought\n");
	out.print  ("\"vow, here we have the beta character!\".\n");
	out.print  ("In many fonts, the sharp s\n");
	out.print  ("(&#223;)\n");
	out.print  ("really looks more or less like the\n");
	out.print  ("Greek lowercase\n");
	out.print  ("beta character (&#946;).\n");
	out.print  ("But it <em>must not</em> be used as a\n");
	out.print  ("surrogate for beta. You wouldn't get very far with it, really;\n");
	out.print  ("what's the big idea of having beta without alpha and all the other\n");
	out.print  ("Greek letters?\n");
	out.print  ("More seriously, the use of sharp s in place of beta would confuse\n");
	out.print  ("text searches, spelling checkers, speech synthesizers,\n");
	out.print  ("indexers,\n");
	out.print  ("etc.; an automatic converter\n");
	out.print  ("might well turn sharp s into ss; and some font might present\n");
	out.print  ("sharp s in a manner which is very different from beta.\n");
	out.print  ("</P>\n");
	out.print  ("<P>For some more explanations on this, see\n");
	out.print  ("<a href=\"latin1/5.html#why\"\n");
	out.print  (">section\n");
	out.print  ("<cite>Why should we be so strict about meanings of characters?</cite></a>\n");
	out.print  ("in\n");
	out.print  ("<a href=\"latin1/\"><cite>The\n");
	out.print  ("ISO Latin 1 character repertoire - a description with usage notes</cite></a>.\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"identity\">Identity of characters: a matter of definition</a></h3>\n");
	out.print  ("<P>\n");
	out.print  ("The\n");
	out.print  ("<dfn>identity of characters</dfn> is defined by the\n");
	out.print  ("<a href=\"#repdef\" title=\"How character repertoires are defined\">definition</a>\n");
	out.print  ("of a\n");
	out.print  ("<a href=\"#repertoire\">character repertoire</a>. Thus, it is not an absolute\n");
	out.print  ("concept but relative to the repertoire; some repertoire might contain\n");
	out.print  ("a character with mixed usage while another defines distinct characters\n");
	out.print  ("for the different uses.\n");
	out.print  ("For instance, the \n");
	out.print  ("<A HREF=\"#ascii\">ASCII</A>\n");
	out.print  ("repertoire has a character called <strong>hyphen</strong>. It is also used as\n");
	out.print  ("a minus sign (as well as a substitute for a dash, since ASCII\n");
	out.print  ("contains no dashes). Thus, that ASCII character is a generic,\n");
	out.print  ("multipurpose character, and one can say that in ASCII hyphen and\n");
	out.print  ("minus are identical.\n");
	out.print  ("But in\n");
	out.print  ("<a href=\"#10646\">Unicode</a>, there are distinct characters\n");
	out.print  ("named \"hyphen\" and \"minus sign\" (as well as different\n");
	out.print  ("dash characters).\n");
	out.print  ("For compatibility,\n");
	out.print  ("the old ASCII character is preserved in Unicode, too\n");
	out.print  ("(in the old code position, with the name\n");
	out.print  ("<a href=\"latin1/3.html#2D\"\n");
	out.print  ("><span class=\"charname\">hyphen-minus</span></a>).</P>\n");
	out.print  ("<P>\n");
	out.print  ("<a name=\"greek\">Similarly, as a matter of definition,</a>\n");
	out.print  ("<a href=\"#10646\">Unicode</a>\n");
	out.print  ("defines characters for\n");
	out.print  ("<a href=\"latin1/3.html#B5\"\n");
	out.print  ("><span class=\"charname\">micro sign</span></a>,\n");
	out.print  ("<A href=\"http://www.eki.ee/letter/chardata.cgi?ucode=220F\"\n");
	out.print  ("><SPAN class=\"charname\">n-ary product</SPAN></a>,\n");
	out.print  ("etc.,\n");
	out.print  ("as distinct from the <strong>Greek letters</strong>\n");
	out.print  ("(small mu, capital pi, etc.) they originate from.\n");
	out.print  ("This is a logical distinction and does not necessarily imply\n");
	out.print  ("that different glyphs are used. The distinction is important e.g.\n");
	out.print  ("when textual data in digital form\n");
	out.print  ("is processed by a program (which \"sees\" the code values, through\n");
	out.print  ("some encoding, and not the glyphs at all).\n");
	out.print  ("Notice that Unicode does not make any distinction\n");
	out.print  ("e.g.\n");
	out.print  ("between the\n");
	out.print  ("<A href=\"http://www.eki.ee/letter/chardata.cgi?ucode=03C0\"\n");
	out.print  ("><SPAN class=\"charname\">greek small letter pi</SPAN></a>&nbsp;(&#960;),\n");
	out.print  ("and the mathematical symbol pi denoting the\n");
	out.print  ("well-known constant 3.14159...\n");
	out.print  ("(i.e. there is no separate symbol for the latter).\n");
	out.print  ("For the\n");
	out.print  ("<A href=\"http://www.eki.ee/letter/chardata.cgi?ucode=2126\"\n");
	out.print  ("><SPAN class=\"charname\">ohm sign</SPAN></a>&nbsp;(&#8486;),\n");
	out.print  ("there is a specific character (in the Symbols Area),\n");
	out.print  ("but it is defined as being\n");
	out.print  ("canonical equivalent\n");
	out.print  ("to\n");
	out.print  ("<A href=\"http://www.eki.ee/letter/chardata.cgi?ucode=03A9\"\n");
	out.print  ("><SPAN class=\"charname\">greek capital letter omega</SPAN></a>&nbsp;(&#937;),\n");
	out.print  ("i.e. there are two separate characters but they are equivalent).\n");
	out.print  ("On the other hand, it makes\n");
	out.print  ("a distinction between\n");
	out.print  ("<A href=\"http://www.eki.ee/letter/chardata.cgi?ucode=03A0\"\n");
	out.print  ("><SPAN class=\"charname\">greek capital letter pi</SPAN></a>&nbsp;(&#928;)\n");
	out.print  ("and the\n");
	out.print  ("mathematical symbol\n");
	out.print  ("<A href=\"http://www.eki.ee/letter/chardata.cgi?ucode=220F\"\n");
	out.print  ("><SPAN class=\"charname\">n-ary product</SPAN></a>&nbsp;(&#8719;),\n");
	out.print  ("so that they are <em>not</em> equivalents.</p>\n");
	out.print  ("<p>If you think this doesn't sound quite logical, you are not\n");
	out.print  ("the only one to think so. But the point is that for\n");
	out.print  ("symbols resembling Greek letter and used in various contexts,\n");
	out.print  ("there are three possibilities in Unicode:\n");
	out.print  ("<ul class=\"emb\">\n");
	out.print  ("<li> the symbol is regarded as identical to the Greek letter (just\n");
	out.print  ("     as its particular <em>usage</em>)\n");
	out.print  ("<li> the symbol is included as a separate character\n");
	out.print  ("     but only for compatibility and as compatibility equivalent to the Greek\n");
	out.print  ("     letter\n");
	out.print  ("<li> the symbol is regarded as a completely separate character.\n");
	out.print  ("</ul>\n");
	out.print  ("<p>You need to check the\n");
	out.print  ("<a href=\"#unicode-ref\">Unicode references</a>\n");
	out.print  ("for information about each individual symbol.\n");
	out.print  ("Note in particular that a query to \n");
	out.print  ("Indrek Hein's\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/\">online character database</a>\n");
	out.print  ("will give such information in the\n");
	out.print  ("<cite>decomposition info</cite> part (but only in the entries for\n");
	out.print  ("compatibility characters!).\n");
	out.print  ("As a rough <em>rule of thumb</em>\n");
	out.print  ("about symbols looking like Greek letters,\n");
	out.print  ("mathematical <em>operators</em>\n");
	out.print  ("(like summation) exist as independent characters whereas\n");
	out.print  ("symbols of <em>quantities and units</em>\n");
	out.print  ("(like pi and ohm)\n");
	out.print  ("are equivalent or identical to Greek letters.\n");
	out.print  ("</p>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"display\">Failures to display a character</a></h3>\n");
	out.print  ("\n");
	out.print  ("<p>In addition to the fact that\n");
	out.print  ("<a href=\"#glyph\">the appearance of a character may vary</a>, it\n");
	out.print  ("is quite possible that\n");
	out.print  ("some program <strong>fails to display a character at all</strong>.\n");
	out.print  ("Perhaps the program cannot interpret a particular way in which\n");
	out.print  ("the character is presented. The reason might simply be that some\n");
	out.print  ("<a href=\"#esc\">program-specific way</a> had been used to denote\n");
	out.print  ("the character and a different program is in use now. (This happens\n");
	out.print  ("quite often even if \"the same\" program is used; for example,\n");
	out.print  ("Internet Explorer version 4.0 is able to recognize\n");
	out.print  ("<code>&amp;alpha;</code> as denoting the Greek letter alpha\n");
	out.print  ("(&#945;)\n");
	out.print  ("but IE 3.0\n");
	out.print  ("is not and displays the notation literally.) \n");
	out.print  ("And naturally it often occurs that a program does not\n");
	out.print  ("recognize the basic <a href=\"#encoding\">character encoding</a> of the\n");
	out.print  ("data, either because it was not properly informed about the encoding\n");
	out.print  ("according to which the data should be interpreted or because\n");
	out.print  ("it has not been programmed to handle the particular encoding in use.\n");
	out.print  ("</p>\n");
	out.print  ("<p>But even if a program <em>recognizes</em> some data as denoting\n");
	out.print  ("a character, it may well be unable to display it since it lacks\n");
	out.print  ("a <a href=\"#glyph\">glyph</a> for it. Often it will help if the\n");
	out.print  ("user manually checks the <a href=\"#font\">font</a> settings,\n");
	out.print  ("perhaps manually trying to find a rich enough font.\n");
	out.print  ("(Advanced programs could be expected to do this automatically\n");
	out.print  ("and even to pick up glyphs from different fonts, but such\n");
	out.print  ("expectations are mostly unrealistic at present.) \n");
	out.print  ("But it's quite possible that no such font can be found.\n");
	out.print  ("As an important detail,\n");
	out.print  ("the possibility of seeing e.g. Greek characters on some Windows systems\n");
	out.print  ("depends on whether \"internationalization support\"\n");
	out.print  ("has been installed.\n");
	out.print  ("</p>\n");
	out.print  ("<p>A well-design program will in some appropriate way\n");
	out.print  ("indicate its inability to display a character. For example,\n");
	out.print  ("a small rectangular box, the size of a character, could be used\n");
	out.print  ("to indicate that there is a character which was recognized but\n");
	out.print  ("cannot be displayed. Some programs use a question mark, but this\n");
	out.print  ("is risky&nbsp;- how is the reader expected to distinguish\n");
	out.print  ("such usage from the real \"?\" character?\n");
	out.print  ("</p>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"math\">Linear text vs. mathematical notations</a></h3>\n");
	out.print  ("\n");
	out.print  ("<p><img src=\"HTML3.2/integral.gif\" title=\n");
	out.print  ("\"A sample image: an integral, produced using TeX\"\n");
	out.print  ("alt=\"\" align=\"right\">\n");
	out.print  ("Although\n");
	out.print  ("several character <a href=\"#repertoire\">repertoires</a>,\n");
	out.print  ("most notably that of <a href=\"#10646\">ISO 10646 and Unicode</a>,\n");
	out.print  ("contain <strong>mathematical</strong> and other symbols,\n");
	out.print  ("the presentation of mathematical <strong>formulas</strong>\n");
	out.print  ("is essentially not a character level problem.\n");
	out.print  ("At the character level, symbols like\n");
	out.print  ("integration or <var>n</var>-ary summation can be defined\n");
	out.print  ("and their <a href=\"#code\">code positions</a> and\n");
	out.print  ("<a href=\"#encoding\">encodings</a> defined, and representative\n");
	out.print  ("<a href=\"#glyph\">glyphs</a> shown, and perhaps some usage notes\n");
	out.print  ("given. But the construction of real formulas, e.g. for\n");
	out.print  ("a definite integral of a function, is a different thing,\n");
	out.print  ("no matter whether one considers formulas abstractly\n");
	out.print  ("(how the structure of the formula is given) or presentationally\n");
	out.print  ("(how the formula is displayed on paper or on screen).\n");
	out.print  ("To mention just a few approaches to such issues,\n");
	out.print  ("the <a href=\"http://www.tex.ac.uk/cgi-bin/texfaq2html\"\n");
	out.print  ("title=\"TeX Frequently Asked Questions\"\n");
	out.print  ("       >TeX</a>\n");
	out.print  ("system is widely used by mathematicians to produce high-quality\n");
	out.print  ("presentations of formulas, and\n");
	out.print  ("<a href=\"http://www.w3.org/Math/\">MathML</a> is an ambitious\n");
	out.print  ("project for creating a markup language for mathematics so that\n");
	out.print  ("both structure and presentation can be handled.\n");
	out.print  ("<br clear=\"all\">\n");
	out.print  ("</p>\n");
	out.print  ("<p><a name=\"plain\">In other respects, too, character\n");
	out.print  ("standards usually deal with <strong>plain text</strong> only.</a>\n");
	out.print  ("Other structural or presentational aspects, such\n");
	out.print  ("as <a href=\"#font\">font</a> variation, are to be handled\n");
	out.print  ("separately. However, there are characters which would <em>now</em>\n");
	out.print  ("be considered as differing in font only but\n");
	out.print  ("for historical reasons regarded as distinct. \n");
	out.print  ("</p>\n");
	out.print  ("<h3><a name=\"compat\">Compatibility characters</a></h3>\n");
	out.print  ("<p>\n");
	out.print  ("There is a large number\n");
	out.print  ("of <dfn>compatibility characters</dfn> in\n");
	out.print  ("<a href=\"#10646\">ISO 10646 and Unicode</a>\n");
	out.print  ("which are variants of other characters.\n");
	out.print  ("They were included for compatibility with other standards\n");
	out.print  ("so that data presented using some other\n");
	out.print  ("<a href=\"#code\">code</a> can be converted to ISO 10646 and back\n");
	out.print  ("without losing information.\n");
	out.print  ("The Unicode standard says (in section&nbsp;2.4):\n");
	out.print  ("<blockquote>\n");
	out.print  ("Compatibility characters are those that would not have been encoded except for compatibility\n");
	out.print  ("and round-trip convertibility with other standards. They are variants of characters\n");
	out.print  ("that already have encodings as \n");
	out.print  ("<em>normal</em> (that is, non-compatibility) characters in the Unicode\n");
	out.print  ("Standard.\n");
	out.print  ("</blockquote>\n");
	out.print  ("<P>There is a large number of compatibility characters in the\n");
	out.print  ("<a href=\"ucs.html8#compat\"\n");
	out.print  (">Compatibility Area</a> but also scattered around the\n");
	out.print  ("Unicode space.</p>\n");
	out.print  ("<p>Many, but not all, compatibility characters have \n");
	out.print  ("<em>compatibility decompositions</em>.\n");
	out.print  (" The\n");
	out.print  ("<a href=\"#unicode-ref\" title=\n");
	out.print  ("\"Reference information on ISO 10646 and Unicode\"\n");
	out.print  (">Unicode database</a> contains, for each character,\n");
	out.print  ("a field (the sixth one)\n");
	out.print  ("which specifies\n");
	out.print  (" its eventual compatibility decomposition.</p>\n");
	out.print  ("<p>\n");
	out.print  ("Thus, to take a simple example,\n");
	out.print  ("<a href=\"latin1/3.html#B2\"><span class=\"charname\">superscript\n");
	out.print  ("two</span>&nbsp;(²)</a> is an\n");
	out.print  ("<a href=\"#latin1\">ISO Latin&nbsp;1</a> character with its own\n");
	out.print  ("code position in that standard. In ISO 10646 way of thinking,\n");
	out.print  ("it would have been treated as just a superscript variant of\n");
	out.print  ("<span class=\"charname\">digit two</span>. But since the character is\n");
	out.print  ("contained in an important standard, it was included into ISO 10646,\n");
	out.print  ("though only as a \"compatibility character\". The practical reason\n");
	out.print  ("is that now one can convert from ISO Latin 1 to ISO 10646 and back\n");
	out.print  ("and get the original data.\n");
	out.print  ("This does not mean that in the ISO 10646 philosophy superscripting\n");
	out.print  ("(or subscripting, italics, bolding etc.) would be irrelevant;\n");
	out.print  ("rather, they are to be <em>handled at another level</em>\n");
	out.print  ("of data presentation, such as some special\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=markup\"\n");
	out.print  ("title=\"What is markup (a definition)\"\n");
	out.print  (">markup</a>.\n");
	out.print  ("</p>\n");
	out.print  ("<p>There is a document\n");
	out.print  ("titled\n");
	out.print  ("<cite><a href=\"http://www.w3.org/TR/unicode-xml/\"\n");
	out.print  (">Unicode in XML and other Markup Languages</a></cite> and\n");
	out.print  (" produced jointly by the World Wide Web Consortium\n");
	out.print  ("(<a href=\"http://www.w3.org\">W3C</a>) and the Unicode Consortium.\n");
	out.print  ("It discusses, among other things, \n");
	out.print  ("<a href=\"http://www.w3.org/TR/unicode-xml/#Compatibility\"\n");
	out.print  (">characters with compatibility mappings</a>: should they be used,\n");
	out.print  ("or should the corresponding non-compatibility characters be used,\n");
	out.print  ("perhaps with some markup and/or style sheet\n");
	out.print  ("that corresponds to the difference\n");
	out.print  ("between them. The answers depend on the nature of the characters and \n");
	out.print  ("the available markup and styling techniques. For example,\n");
	out.print  ("for superscripts, the use of <code>sup</code> markup (as in HTML)\n");
	out.print  ("is recommended, i.e. <code>&lt;sup&gt;2&lt;/sup&gt;</code>\n");
	out.print  ("is preferred over&nbsp;sup2; This is a debatable issue; see my\n");
	out.print  ("<a href=\"HTML3.2/SUBSUP.html\"\n");
	out.print  (">notes on <code>sup</code> and <code>sub</code> markup</a>.</p>\n");
	out.print  ("\n");
	out.print  ("<p>The definition of Unicode indicates our sample character,\n");
	out.print  ("<a href=\"latin1/3.html#B2\"><span class=\"charname\">superscript\n");
	out.print  ("two</span></a>, as a compatibility character with the\n");
	out.print  ("<dfn>compatibility decomposition</dfn> \"&lt;super&gt;&nbsp;+&nbsp;0032&nbsp;2\".\n");
	out.print  ("Here \"&lt;super&gt;\" is a semi-formal way of referring to what is considered\n");
	out.print  ("as typographic variation, in this case superscript style, and \"0032 2\"\n");
	out.print  ("shows the hexadecimal code of a character and the character itself.\n");
	out.print  ("<p>\n");
	out.print  ("<a name=\"ligature\">Some</a>\n");
	out.print  ("<a href=\"#compat\">compatibility characters</a> have compatibility decompositions\n");
	out.print  ("consisting of several characters.\n");
	out.print  ("Due to this property, they can be said to represent\n");
	out.print  ("<dfn>ligatures</dfn> in the broad sense.\n");
	out.print  ("For example,\n");
	out.print  ("<span class=\"charname\">latin small ligature fi</span>\n");
	out.print  ("(<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=FB01\"><code>U+FB01</code></a>)\n");
	out.print  ("has the obvious decomposition consisting of letters \"f\" and \"i\".\n");
	out.print  ("It is still a distinct character in Unicode, but in the spirit of\n");
	out.print  ("<a href=\"#10646\">Unicode</a>,\n");
	out.print  ("we should not use it except for storing and transmitting existing data which\n");
	out.print  ("contains that character. Generally, ligature issues should be handled\n");
	out.print  ("outside the character level, e.g. selected automatically by a formatting program\n");
	out.print  ("or indicated using some suitable\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=markup\"\n");
	out.print  ("title=\"What is markup (a definition)\"\n");
	out.print  (">markup</a>.\n");
	out.print  ("</p><p>\n");
	out.print  ("Note that the <em>word</em> ligature can be misleading when it appears\n");
	out.print  ("in a character name. In particular, the old name of the character \"æ\",\n");
	out.print  ("<span class=\"charname\">latin small letter ae</span>\n");
	out.print  ("(<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=00E6\"><code>U+00E6</code></a>),\n");
	out.print  ("is <span class=\"charname\">latin small ligature ae</span>, but it is <em>not</em>\n");
	out.print  ("a ligature of \"a\" and \"e\" in the sense described above. It has no\n");
	out.print  ("compatibility decomposition.\n");
	out.print  ("</p>\n");
	out.print  ("<p>In <a href=\"http://www.faqs.org/faqs/fonts-faq/part2/\">\n");
	out.print  ("<cite>comp.fonts FAQ, General Info (2/6)</cite></a>\n");
	out.print  ("section <cite>1.15 Ligatures</cite>,\n");
	out.print  ("the term <dfn>ligature</dfn> is defined as follows:\n");
	out.print  ("<blockquote>\n");
	out.print  ("<small>\n");
	out.print  ("A ligature occurs where two or more letterforms are written or printed\n");
	out.print  ("  as a unit.  Generally, ligatures replace characters that occur next to\n");
	out.print  ("  each other when they share common components.  Ligatures are a subset\n");
	out.print  ("  of a more general class of figures called \"contextual forms.\"</small></blockquote>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"compose\">Compositions and decompositions</a></h3>\n");
	out.print  ("\n");
	out.print  ("<p><a name=\"diacritic\">A <dfn>diacritic mark</dfn></a>, i.e. an additional\n");
	out.print  ("graphic such as an accent or cedilla attached to a character,\n");
	out.print  ("can be treated\n");
	out.print  ("in different ways when defining a character repertoire.\n");
	out.print  ("See some\n");
	out.print  ("<a href=\"latin1/4.html#diacr\"\n");
	out.print  ("title=\"Diacritics (accents etc.) and letters with them\"\n");
	out.print  (">historical notes</a>\n");
	out.print  ("on this in my <a href=\"latin1/\" title=\n");
	out.print  ("\"The ISO Latin 1 character repertoire - a description with usage notes\"\n");
	out.print  (">description of ISO Latin&nbsp;1</a>.\n");
	out.print  ("It also explains why the so-called <em>spacing</em> diacritic marks are of very\n");
	out.print  ("limited usefulness, except when taken into some secondary usage.\n");
	out.print  ("<p>In the\n");
	out.print  ("<a href=\"#10646\">Unicode</a> approach,\n");
	out.print  ("there are\n");
	out.print  ("separate characters called\n");
	out.print  ("<dfn><a href=\"http://www.unicode.org/charts/U0300.pdf\"\n");
	out.print  ("title=\"Combining Diacritical Marks (Unicode block U+0300 to U+036F)\"\n");
	out.print  (">combining diacritical marks</a></dfn>.\n");
	out.print  ("The general idea is that you can express a vast set of characters with diacritics\n");
	out.print  ("by representing them so that a base character is followed by one or more&nbsp;(!)\n");
	out.print  ("combining (non-spacing) diacritic marks.\n");
	out.print  ("And a program which <em>displays</em> such a construct is expected to do rather\n");
	out.print  ("clever things in formatting, e.g. selecting a particular shape for the\n");
	out.print  ("diacritic according to the shape of the base character.\n");
	out.print  ("This requires Unicode support at\n");
	out.print  ("<a href=\"http://www.nada.kth.se/i18n/ucs/unicode-iso10646-oview.html#3\"\n");
	out.print  (">implementation level</a>&nbsp;3. Most programs currently in use are\n");
	out.print  ("totally incapable of doing anything meaningful with combining diacritic marks.\n");
	out.print  ("But there is some simple support to them in Internet Explorer for example,\n");
	out.print  ("though you would need a font which contains the combining diacritics\n");
	out.print  ("(such as\n");
	out.print  ("<!--\"http://office.microsoft.com/downloads/2000/aruniupd.aspx\"-->\n");
	out.print  ("Arial Unicode MS); then IE can handle simple combinations reasonably.\n");
	out.print  ("See\n");
	out.print  ("<a href=\"http://www.alanwood.net/unicode/combining_diacritical_marks.html\"\n");
	out.print  (">test page for combining diacritic marks</a> in\n");
	out.print  ("<a href=\"http://www.alanwood.net/unicode/index.html\"\n");
	out.print  (">Alan Wood's Unicode resources</a>.\n");
	out.print  ("Regarding advanced implementation of the rendering of characters\n");
	out.print  ("with diacritic marks, consult Unicode Technical Note&nbsp;#2,\n");
	out.print  ("<cite><a href=\"http://www.unicode.org/notes/tn2/\"\n");
	out.print  (">A General Method for Rendering\n");
	out.print  ("Combining Marks</a></cite>.</p>\n");
	out.print  ("\n");
	out.print  ("<p>Using combining diacritic marks, we have wide range of\n");
	out.print  ("possibilities. We can put, say,\n");
	out.print  ("a diaeresis on a gamma, although \"Greek small letter\n");
	out.print  ("gamma with diaeresis\" does not exist <em>as a character</em>.\n");
	out.print  ("The combination <code>U+03B3 U+0308</code> consists of two\n");
	out.print  ("characters, although its visual presentation looks like a single\n");
	out.print  ("character in the same sense as \"ä\" looks like a single character.\n");
	out.print  ("This is how your browser displays the combination:\n");
	out.print  ("\"&#947;&#776;\". In most browsing situations at present, it probably\n");
	out.print  ("isn't displayed correctly; you might see e.g. the letter gamma\n");
	out.print  ("followed by a box that indicates a missing glyph, or you might see\n");
	out.print  ("gamma followed by a diaeresis shown separately&nbsp;(¨).</p> \n");
	out.print  ("\n");
	out.print  ("<p>Thus, in practical terms, in order to use a character with a diacritic\n");
	out.print  ("mark, you should primarily \n");
	out.print  ("try to find it as a <dfn>precomposed</dfn> character.\n");
	out.print  ("A&nbsp;precomposed character, also called\n");
	out.print  ("<dfn>composite character</dfn> or <dfn>decomposable character</dfn>,\n");
	out.print  ("is one that has a\n");
	out.print  ("<a href=\"#code\">code position</a> (and thereby\n");
	out.print  ("<a href=\"#identity\">identity</a>)\n");
	out.print  ("of its own but is in some sense equivalent to a sequence of other\n");
	out.print  ("characters.\n");
	out.print  ("There are lots of them in Unicode, and they cover the needs of most (but not all)\n");
	out.print  ("languages of the world, but not e.g. the presentation of\n");
	out.print  ("the <a href=\"http://www.arts.gla.ac.uk/IPA/ipachart.html\"\n");
	out.print  ("       >International phonetic alphabet</a>\n");
	out.print  ("by\n");
	out.print  ("<a href=\"http://www.arts.gla.ac.uk/IPA/ipa.html\"\n");
	out.print  ("title=\"International Phonetic Association\"\n");
	out.print  ("       >IPA</a> which, in its general form, requires several different \n");
	out.print  ("diacritic marks. For example,\n");
	out.print  ("the character\n");
	out.print  ("<span class=\"charname\">latin small letter a with diaeresis</span>\n");
	out.print  ("(<a href=\"latin1/3.html#E4\"\n");
	out.print  ("><code>U+00E4</code></a>,&nbsp;ä) is, by Unicode definition, decomposable\n");
	out.print  ("to the sequence of the two characters\n");
	out.print  ("<span class=\"charname\">latin small letter a</span>\n");
	out.print  ("(<a href=\"latin1/3.html#61\"\n");
	out.print  ("><code>U+0061</code></a>) and\n");
	out.print  ("<span class=\"charname\">combining diaeresis</span>\n");
	out.print  ("(<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=0308\"><code>U+0308</code></a>).\n");
	out.print  ("This is at present mostly a theoretic possibility.\n");
	out.print  ("Generally by decomposing all decomposable characters one could\n");
	out.print  ("in many cases simplify the processing of textual data (and the\n");
	out.print  ("resulting data might be converted back to a format using precomposed\n");
	out.print  ("characters). See e.g. the working draft \n");
	out.print  ("<a href=\"http://www.w3.org/TR/WD-charmod\"\n");
	out.print  ("><cite>Character Model for the World Wide Web</cite></a>.\n");
	out.print  ("</p>\n");
	out.print  ("\n");
	out.print  ("\n");
	out.print  ("<H2><A NAME=\"typing\">Typing characters</A></H2>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"key\">Just pressing a key?</a></h3>\n");
	out.print  ("<P>\n");
	out.print  ("Typing characters on a computer may appear deceptively simple:\n");
	out.print  ("you press a key labeled \"A\", and the character \"A\" appears on the\n");
	out.print  ("screen. Well, you actually get uppercase \"A\" or lowercase \"a\" \n");
	out.print  ("depending on whether you used the shift key or not, but that's common\n");
	out.print  ("knowledge.\n");
	out.print  ("You also expect \"A\" to be included into a disk file when\n");
	out.print  ("you save what you are typing,\n");
	out.print  ("you expect \"A\" to appear on paper if you print your text,\n");
	out.print  ("and you expect \"A\" to be sent if\n");
	out.print  ("you send your product by E-mail or something like that.\n");
	out.print  ("And you expect the recipient to see an&nbsp;\"A\".\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Thus far, you should have learned that the presentation of\n");
	out.print  ("a character in computer storage or disk or in data transfer may vary\n");
	out.print  ("a lot. You have probably realized that especially if it's not\n");
	out.print  ("the common \"A\" but something more special (say, an \"A\" with an accent),\n");
	out.print  ("strange things might happen, especially if data is not accompanied\n");
	out.print  ("with adequate\n");
	out.print  ("<a href=\"#encinfo\">information about its encoding</a>.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("But you might still be too confident. You probably expect that\n");
	out.print  ("on <em>your</em> system at least things are simpler than that.\n");
	out.print  ("If you use your very own very personal computer and press the\n");
	out.print  ("key labeled \"A\" on <em>its</em> keyboard, then shouldn't it be\n");
	out.print  ("evident that in <em>its</em> storage and processor, on <em>its</em>\n");
	out.print  ("disk, on <em>its</em> screen it's invariably \"A\"? Can't you just\n");
	out.print  ("ignore its internal character code and character encoding?\n");
	out.print  ("Well, probably yes - with \"A\". I wouldn't be so sure about \"Ä\",\n");
	out.print  ("for instance. (On Windows systems, for example, DOS mode programs\n");
	out.print  ("differ from genuine Windows programs in this respect; they use a\n");
	out.print  ("<a href=\"#cp\">DOS character code</a>.)\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("When you press a key on your\n");
	out.print  ("<a href=\"http://webopedia.internet.com/TERM/k/keyboard.html\"\n");
	out.print  ("title=\"Webopedia entry for &quot;keyboard&quot;\">keyboard</a>,\n");
	out.print  ("then what actually happens is this.\n");
	out.print  ("The keyboard sends the code of a character to the processor. The\n");
	out.print  ("processor then, in addition to storing the data internally somewhere,\n");
	out.print  ("normally sends it to the display device.\n");
	out.print  ("(For more details on this, as regards to one common situation, see\n");
	out.print  ("<cite><a href=\n");
	out.print  ("\"http://www.pcguide.com/intro/works/exampl.htm\"\n");
	out.print  (">Example: What Happens When You Press A Key</a></cite>\n");
	out.print  ("in <cite><a href=\"http://www.pcguide.com/index.htm\"\n");
	out.print  (">The PC Guide</a></cite>.)\n");
	out.print  ("Now, the\n");
	out.print  ("<em>keyboard settings</em> and the <em>display settings</em> might\n");
	out.print  ("be different from what you expect. Even if a key is labeled \"Ä\",\n");
	out.print  ("it might send something else than the code of \"Ä\" in the character\n");
	out.print  ("code used in your computer. Similarly, the display device, upon\n");
	out.print  ("receiving such a code, might be set to display something different.\n");
	out.print  ("Such mismatches are usually undesirable, but they are definitely\n");
	out.print  ("<em>possible</em>.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("<a name=\"kbd\">Moreover, there are often <em>keyboard restrictions</em>.</a>\n");
	out.print  ("If your computer uses internally, say,\n");
	out.print  ("<a href=\"#latin1\">ISO Latin 1</a> character repertoire, you probably\n");
	out.print  ("won't find keys for all 191 characters in it on your keyboard.\n");
	out.print  ("And for <a href=\"#10646\">Unicode</a>, it would be quite\n");
	out.print  ("impossible to have a key for each character!\n");
	out.print  ("Different keyboards are used, often according to the needs of\n");
	out.print  ("particular languages. For example,\n");
	out.print  ("keyboards used in Sweden often have a key for the\n");
	out.print  ("<a href=\"latin1/3.html#E5\"\n");
	out.print  ("title=\"letter a with ring above\"\n");
	out.print  (">å</a> character\n");
	out.print  ("but seldom a key for&nbsp;<a\n");
	out.print  ("href=\"latin1/3.html#F1\"\n");
	out.print  ("title=\"letter n with tilde\"\n");
	out.print  (">ñ</a>; in Spain the opposite is true.\n");
	out.print  ("Quite often some keys have multiple uses via various\n");
	out.print  ("<a href=\"#keycomb\">\"composition\"</a> keys, as explained below. \n");
	out.print  ("For an illustration of the variation,\n");
	out.print  ("as well as to see what layout <em>might</em>  be used in some\n");
	out.print  ("environments, see\n");
	out.print  ("<ul><li>\n");
	out.print  ("<cite><a href=\"http://www.terena.nl/multiling/ml-mua/test/kbd-all.html\"\n");
	out.print  (">International Keyboards</a></cite>\n");
	out.print  ("at <a href=\"http://www.terena.nl/\"\n");
	out.print  ("title=\"TERENA : Trans-European Research and Education Networking Association\"\n");
	out.print  (">Terena</a>\n");
	out.print  ("     (contains some errors)\n");
	out.print  ("<li> <cite><a href=\n");
	out.print  ("\"http://www.hermessoft.com/newproject/lang.html\"\n");
	out.print  (">Keyboard\n");
	out.print  ("layouts</a></cite> by <a href=\"http://www.hermessoft.com/\">HermesSOFT</a>\n");
	out.print  ("<!--old: \"http://www.hermessoft.com/lang.html\"-->\n");
	out.print  ("<li> <cite><a href=\"http://www.sussex.ac.uk/USCS/Software/Keyboards/\"\n");
	out.print  ("     >Alternative Keyboard Layouts</a></cite> at\n");
	out.print  ("<a href=\"http://www.sussex.ac.uk/USCS/\"\n");
	out.print  ("title=\"SCS - University of Sussex Computing Service\"\n");
	out.print  ("     >USCC</a>\n");
	out.print  ("<li> <cite><a href=\"http://crl.nmsu.edu/~mleisher/keyboards/index.html\"\n");
	out.print  ("     >Keyboard layouts</a></cite> documented by\n");
	out.print  ("<a href=\"http://crl.nmsu.edu/~mleisher/\">Mark Leisher</a>; contains\n");
	out.print  ("     several layouts for \"exotic\" languages too\n");
	out.print  ("<li>The interactive\n");
	out.print  ("<cite><a href=\n");
	out.print  ("\"http://www.microsoft.com/globaldev/reference/keyboards.mspx\"\n");
	out.print  (">Windows\n");
	out.print  ("Layouts</a></cite> page by <a href=\"http://www.microsoft.com\">Microsoft</a>;\n");
	out.print  ("     requires Internet Explorer with JavaScript enabled.\n");
	out.print  ("(Actually, using it I found out new features\n");
	out.print  ("in the Finnish keyboard I have: I can use\n");
	out.print  (" Alt&nbsp;Gr&nbsp;m to produce the micro sign&nbsp;µ, although there\n");
	out.print  ("     is no hint about this in the \"m\" key itself.)\n");
	out.print  ("</ul>\n");
	out.print  ("\n");
	out.print  ("<p id=\"vkey\">In several systems, including\n");
	out.print  ("<abbr title=\"Microsoft\">MS</abbr> Windows,  \n");
	out.print  ("it is possible to switch between different\n");
	out.print  ("keyboard settings. This means that the effects of different keys do not\n");
	out.print  ("necessarily correspond to the engravings in the key caps but to\n");
	out.print  ("some other assignments. To ease typing in such situations,\n");
	out.print  ("\"virtual keyboards\" can be used. This means that an image of a\n");
	out.print  ("keyboard is visible on the screen, letting the user type\n");
	out.print  ("characters by clicking on keys in it or using the information\n");
	out.print  ("to see the current assignments of the keys of the physical\n");
	out.print  ("keyboard. For the Office software on Windows systems,\n");
	out.print  ("there is a free add-in available for this:\n");
	out.print  ("<a href=\"http://office.microsoft.com/downloads/2002/VkeyInst.aspx\"\n");
	out.print  (">Microsoft Visual Keyboard</a>.</p>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"pgm\">Program-specific methods for typing characters</a></h3>\n");
	out.print  ("<P>\n");
	out.print  ("Thus, you often need program-specific\n");
	out.print  ("ways of entering characters from a keyboard, either because\n");
	out.print  ("there is no key for a character you need or there is but it does\n");
	out.print  ("not work (properly). \n");
	out.print  ("The program involved might be part of system software, or it might be\n");
	out.print  ("an application program.\n");
	out.print  ("Three important <em>examples</em> of such ways:\n");
	out.print  ("</P>\n");
	out.print  ("<ul>\n");
	out.print  ("  <li> <a name=\"alt\">On <strong>Windows</strong> systems,</a>\n");
	out.print  ("       you can (usually&nbsp;- some application programs\n");
	out.print  ("       may override this) produce\n");
	out.print  ("       any character in the\n");
	out.print  ("       <a href=\"#win\">Windows character set</a> (naturally, in its\n");
	out.print  ("       Windows encoding) as follows:\n");
	out.print  ("       Press down the (left) <a href=\"#altkeys\">Alt&nbsp;key</a>\n");
	out.print  ("        and keep it down. Then type, using the\n");
	out.print  ("       separate\n");
	out.print  ("<a href=\"http://webopedia.internet.com/TERM/n/numeric_keypad.html\">numeric keypad</a>\n");
	out.print  ("       (not the numbers above the letter keys!),\n");
	out.print  ("       the four-digit code of the character in decimal.\n");
	out.print  ("       Finally release the Alt key.\n");
	out.print  ("              Notice that\n");
	out.print  ("       the first digit is always&nbsp;0, since\n");
	out.print  ("the code values are in the range 32&nbsp;-&nbsp;255 (decimal).\n");
	out.print  ("       For instance, to produce the letter \"Ä\"\n");
	out.print  ("(which has code 196 in decimal),\n");
	out.print  ("       you would press Alt down,\n");
	out.print  ("       type 0196 and then release Alt.\n");
	out.print  ("Upon releasing Alt, the character should appear on the screen.\n");
	out.print  ("In MS Word, the method works only if Num Lock is set.\n");
	out.print  ("       This method is often referred to\n");
	out.print  ("       as <dfn>Alt-<var>0nnn</var></dfn>.\n");
	out.print  ("(If you omit the leading zero, i.e. use\n");
	out.print  ("<dfn>Alt-<var>nnn</var></dfn>, the effect is <em>different</em>,\n");
	out.print  ("       since that way you insert the character in code position\n");
	out.print  ("<var>nnn</var> in the\n");
	out.print  ("<a href=\"#cp\"><em>DOS character code</em></a>!\n");
	out.print  ("For example, Alt-196 would probably insert a graphic\n");
	out.print  ("character which looks somewhat like a hyphen.\n");
	out.print  ("There are variations in the behavior of various Windows\n");
	out.print  ("programs in this area, and using those DOS codes is best\n");
	out.print  ("       avoided.)\n");
	out.print  ("  <li> <a name=\"cq\">In the</a>\n");
	out.print  ("       <a href=\n");
	out.print  ("\"http://www.gnu.org/manual/emacs/index.html\"\n");
	out.print  ("title=\"GNU Emacs Manual - ToC\"\n");
	out.print  ("       ><strong>Emacs</strong></a>\n");
	out.print  ("       editor\n");
	out.print  ("(which is popular especially on Unix systems),\n");
	out.print  ("you can produce\n");
	out.print  ("       any <a href=\"#latin1\">ISO Latin&nbsp;1</a> character\n");
	out.print  ("       by typing first\n");
	out.print  ("       control-Q, then its code as a three-digit\n");
	out.print  ("<A TITLE=\n");
	out.print  ("'description of octal notation (in Free On-line Dictionary of Computing)'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=octal&amp;action=Search\"\n");
	out.print  (">octal</A>\n");
	out.print  ("number. To produce \"Ä\", you would thus type control-Q followed by\n");
	out.print  ("the three digits 304 (and expect the \"Ä\" character to appear on screen).\n");
	out.print  ("       This method is often referred to\n");
	out.print  ("       as <dfn>C-Q-<var>nnn</var></dfn>.\n");
	out.print  ("There are\n");
	out.print  ("<A TITLE=\"Using iso-accents-mode in Emacs\"\n");
	out.print  (" HREF=\"emacs-iso.html\">\n");
	out.print  ("other ways of entering many ISO Latin 1 characters in Emacs</A>, too.\n");
	out.print  ("From version 23, there are good tools for\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://xahlee.org/emacs/emacs_n_unicode.html\"\n");
	out.print  ("title=\"Emacs and Unicode Tips, by Xah Lee\"\n");
	out.print  (">entering any character in Emacs</a>.\n");
	out.print  ("<li> <a name=\"fix-change\">Text processing programs</a> often\n");
	out.print  ("<strong>modify user input</strong> e.g. so that when you have typed the three\n");
	out.print  ("characters \"(\", \"c\", and \")\", the program changes,\n");
	out.print  ("both internally and visibly,\n");
	out.print  ("that string to the single\n");
	out.print  ("character&nbsp;\"&copy;\". This is often convenient, especially if you can add\n");
	out.print  ("your own rules for modifications, but it causes unpleasant surprises and\n");
	out.print  ("problems when you actually meant what you wrote, e.g. wanted to write letter\n");
	out.print  ("\"c\" in parentheses.\n");
	out.print  ("  <li> <a name=\"keycomb\">Programs often process\n");
	out.print  ("       some <strong>keyboard key combinations</strong></a>,\n");
	out.print  ("typically involving the use of an\n");
	out.print  ("<a href=\"#altkeys\">Alt\n");
	out.print  ("       or\n");
	out.print  ("Alt&nbsp;Gr</a>\n");
	out.print  ("key or some other \"composition key\",\n");
	out.print  ("       by converting them to special characters.\n");
	out.print  ("In fact, even the well-known shift key is a composition key: it is\n");
	out.print  ("used to modify the meaning of another key, e.g. by changing a letter\n");
	out.print  ("       to uppercase or turning a digit key to a special character key.\n");
	out.print  ("Such things are not just \"program-specific\"; they also <em>depend\n");
	out.print  ("       on the program version and settings</em> (and on the keyboard,\n");
	out.print  ("       of course), and could well be user-modifiable.\n");
	out.print  ("       For example,\n");
	out.print  ("in order to support the\n");
	out.print  ("       <a href=\"latin9.html#euro\"\n");
	out.print  ("       ><span class=\"charname\">euro sign</span></a>,\n");
	out.print  ("various methods have been developed, e.g.\n");
	out.print  ("by Microsoft\n");
	out.print  ("       so that\n");
	out.print  ("pressing the \"e\" key while keeping the Alt&nbsp;Gr key pressed down\n");
	out.print  ("       might produce the\n");
	out.print  ("<span class=\"charname\">euro sign</span>&nbsp;- in <em>some</em>\n");
	out.print  ("       <a href=\"#encoding\">encoding</a>!\n");
	out.print  ("But this may require a special \"euro update\", and\n");
	out.print  ("<a href=\"http://www.microsoft.com/typography/faq/faq12.htm#kbd\"\n");
	out.print  ("title=\n");
	out.print  ("    \"The euro currency symbol: Accessing the symbol (on Windows systems)\"        \n");
	out.print  ("       >the key combinations vary</a> even when we consider\n");
	out.print  ("       Microsoft products only. So it would be quite inappropriate\n");
	out.print  ("to say e.g. \"to type the euro, use AltGr+e\"\n");
	out.print  ("       as general, unqualified advice.\n");
	out.print  ("</ul>\n");
	out.print  ("\n");
	out.print  ("<p><small><a name=\"altkeys\">The \"Alt\" \n");
	out.print  ("and \"Alt Gr\" keys</a>\n");
	out.print  ("mentioned above are not present on all keyboards, and often they\n");
	out.print  ("both carry the text \"Alt\" but they can be functionally different!\n");
	out.print  ("Typically, those keys are on the left and on the right of the space bar.\n");
	out.print  ("It depends on the physical keyboard what the key cap texts are, and it\n");
	out.print  ("depends on the keyboard settings whether the keys have the same effect\n");
	out.print  ("or different effects.\n");
	out.print  ("The name \"Alt&nbsp;Gr\" for \"right Alt\"\n");
	out.print  ("is short for \"alternate graphic\", and it's mostly used to create\n");
	out.print  ("additional characters, whereas (left) \"Alt\" \n");
	out.print  ("is typically used for keyboard access\n");
	out.print  ("to menus.</small></p>\n");
	out.print  ("\n");
	out.print  ("<p>The last method above could often be called \"device dependent\"\n");
	out.print  ("rather than program specific, since\n");
	out.print  ("the program that performs the conversion might be a keyboard\n");
	out.print  ("       <a href=\"http://webopedia.internet.com/TERM/d/driver.html\">driver</a>.\n");
	out.print  ("In that case, normal programs would have all their input from\n");
	out.print  ("the keyboard processed that way.\n");
	out.print  ("This method may also involve the use of auxiliary keys\n");
	out.print  ("for typing characters with <strong><a\n");
	out.print  ("href=\"#diacritic\">diacritic marks</a></strong>\n");
	out.print  ("such as&nbsp;\"<a\n");
	out.print  ("href=\"latin1/3.html#E1\"\n");
	out.print  ("title=\"letter a with acute accent\"\n");
	out.print  (">á</a>\".\n");
	out.print  ("Such an auxiliary key is often called <dfn>dead key</dfn>, since\n");
	out.print  ("just pressing it causes nothing; it works only in combination with\n");
	out.print  ("some other key. A more official name for a dead key is\n");
	out.print  ("<dfn>modifier key</dfn>.\n");
	out.print  ("For example, depending on the keyboard and the driver,\n");
	out.print  ("you <em>might</em> be able to produce \"á\" by pressing first a key labeled\n");
	out.print  ("with the acute accent&nbsp;(´), then the \"a\" key.</p>\n");
	out.print  ("<p><small><em>My</em> keyboard has two keys for such purposes.\n");
	out.print  ("There's the accent key, with the acute accent and\n");
	out.print  ("the grave accent&nbsp;(`) as \"upper case\" character,\n");
	out.print  ("meaning I need to use the\n");
	out.print  ("<a href=\"http://webopedia.internet.com/TERM/S/Shift_key.html\">shift key</a>\n");
	out.print  ("for the grave. And there's a key\n");
	out.print  ("with the dieresis&nbsp;(¨) and the circumflex&nbsp;(^) above it\n");
	out.print  ("(i.e. as \"upper case\")\n");
	out.print  ("and the tilde&nbsp;(~) below or left to it\n");
	out.print  ("(meaning I need to use Alt&nbsp;Gr for it),\n");
	out.print  ("so I can produce <a href=\"#latin1\">ISO Latin&nbsp;1</a> characters\n");
	out.print  ("with those diacritics.\n");
	out.print  ("Note that this does <em>not</em> involve any operation on the\n");
	out.print  ("<em>characters</em> \n");
	out.print  ("<nobr>´`¨^~</nobr>, and the keyboard does not send those\n");
	out.print  ("characters at all in such situations.\n");
	out.print  ("If I try to enter that way a character\n");
	out.print  ("outside the ISO Latin&nbsp;1 repertoire, I get just the diacritic\n");
	out.print  ("as a separate character followed by the normal character,\n");
	out.print  ("e.g. \"^j\". To enter the diacritic itself, such as\n");
	out.print  ("the <a\n");
	out.print  ("href=\"latin1/3.html#7E\">tilde&nbsp;(~)</a>,\n");
	out.print  ("I may need to press the space bar so that the tilde diacritic combines\n");
	out.print  ("with the blank (producing&nbsp;~) instead of a letter\n");
	out.print  ("(producing e.g.&nbsp;\"ã\"). Your situation may well be different,\n");
	out.print  ("in part or entirely. For example, a typical French keyboard\n");
	out.print  ("has separate keys for those accented letters\n");
	out.print  ("that are used in French (e.g.&nbsp;\"à\"), but the accents themselves\n");
	out.print  ("can be difficult to produce. You might need to type AltGr&nbsp;è followed\n");
	out.print  ("by a space to produce the grave accent `.</small></p>\n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"esc\">\"Escape\" notations (\"meta notations\") for characters</a></h3>\n");
	out.print  ("<P>It is often possible to use various\n");
	out.print  ("<dfn>\"escape\" notations</dfn> for\n");
	out.print  ("characters. This rather vague term means notations which are\n");
	out.print  ("<em>afterwards</em>\n");
	out.print  ("converted to\n");
	out.print  ("(or just displayed as)\n");
	out.print  ("characters according to some specific rules by some\n");
	out.print  ("programs. They depend on the markup, programming, or other\n");
	out.print  ("<em>language</em> (in a broad but technical meaning for \"language\",\n");
	out.print  ("so that data formats can be included but human languages are excluded).\n");
	out.print  ("If different languages have similar conventions in this respect, a language\n");
	out.print  ("designer may have picked up a notation from an existing language,\n");
	out.print  ("or it might be a coincidence.</p>\n");
	out.print  ("\n");
	out.print  ("<p>The phrase \"escape notations\" or even \"escapes\" for short is rather\n");
	out.print  ("widespread, and it reflects the general idea of escaping from the limitations\n");
	out.print  ("of a character repertoire or device or protocol or something else.\n");
	out.print  ("So it's used here, although a name like\n");
	out.print  ("<dfn>meta notations</dfn> might be better. It is any case essential to\n");
	out.print  ("distinguish these notations from the use of the\n");
	out.print  ("ESC (escape) <a href=\"#control\">control code</a>\n");
	out.print  ("in <a href=\"#ascii\">ASCII</a> and other character codes.</p>\n");
	out.print  ("\n");
	out.print  ("<p>Examples:\n");
	out.print  ("<ul>\n");
	out.print  ("  <li> In the\n");
	out.print  ("<a href=\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=postscript\"\n");
	out.print  ("       >PostScript</a> language,\n");
	out.print  ("characters have <em>names</em>,\n");
	out.print  ("such as <code>Adieresis</code> for&nbsp;<a\n");
	out.print  ("       href=\"latin1/3.html#C4\"\n");
	out.print  ("title=\"capital a with diaeresis\"\n");
	out.print  ("     >Ä</a>,\n");
	out.print  ("       which can be used to denote them\n");
	out.print  ("       according to certain rules.\n");
	out.print  ("  <li> In the <a href=\n");
	out.print  ("\"http://webopedia.internet.com/TERM/r/rich_text_format_RTF.html\"\n");
	out.print  ("title=\"Rich Text Format\"\n");
	out.print  ("       >RTF</a> data format, the notation <code>\\'c4</code> is used to\n");
	out.print  ("       denote&nbsp;<a\n");
	out.print  ("       href=\"latin1/3.html#C4\"\n");
	out.print  ("title=\"capital a with diaeresis\"\n");
	out.print  ("     >Ä</a>.\n");
	out.print  ("  <li> In <a href=\"http://www.tex.ac.uk/cgi-bin/texfaq2html\"\n");
	out.print  ("title=\"TeX Frequently Asked Questions\"\n");
	out.print  ("       >TeX</a>\n");
	out.print  ("       systems, there are different ways of producing characters,\n");
	out.print  ("possibly depending on the \"packages\" used. Examples of\n");
	out.print  ("       ways to produce&nbsp;<a\n");
	out.print  ("       href=\"latin1/3.html#C4\"\n");
	out.print  ("title=\"capital a with diaeresis\"\n");
	out.print  ("     >Ä</a>: <code>\\\"A</code>, <code>\\symbol{196}</code>,\n");
	out.print  ("<code>\\char'0304</code>,\n");
	out.print  ("       <code>\\capitaldieresis{A}</code>\n");
	out.print  ("(for a large list, consult \n");
	out.print  ("<cite><a href=\"http://www.ctan.org/tex-archive/info/symbols/comprehensive/\"\n");
	out.print  ("title=\n");
	out.print  ("\"The CTAN info/symbols/comprehensive/ directory\"\n");
	out.print  (">The Comprehensive LaTex Symbol List</a></cite>\n");
	out.print  ("\n");
	out.print  ("<li><a name=\"ent\">In</a> the <a title=\"A primer on HTML, by Jukka Korpela\"\n");
	out.print  ("href=\"html-primer.html\">HTML language</a> one can use the\n");
	out.print  ("notation <code>&amp;Auml;</code>\n");
	out.print  ("for\n");
	out.print  ("<a href=\"latin1/3.html#C4\"\n");
	out.print  ("title=\"capital a with diaeresis\"\n");
	out.print  ("     >the character&nbsp;Ä</a>.\n");
	out.print  ("       In the official HTML terminology,\n");
	out.print  ("     such notations are called\n");
	out.print  ("<a href=\"http://www.htmlhelp.org/reference/html40/entities/\"\n");
	out.print  ("title=\"Entities (in HTML 4.0 Reference)\"\n");
	out.print  ("     >entity references (denoting characters)</a>.\n");
	out.print  ("It depends on HTML version which entities are defined,\n");
	out.print  ("       and it depends on a browser\n");
	out.print  ("<a href=\"http://www.htmlhelp.com/faq/html/all.html#entity-or-number\"\n");
	out.print  ("       >which entities are actually supported</a>.\n");
	out.print  ("  <li><a name=\"numref\">In HTML,\n");
	out.print  ("one can also use the notation\n");
	out.print  ("<code>&amp;#196;</code></a> for\n");
	out.print  ("<a href=\"latin1/3.html#C4\"\n");
	out.print  ("title=\"capital a with diaeresis\"\n");
	out.print  ("     >the character&nbsp;Ä</a>.\n");
	out.print  ("Generally, in any\n");
	out.print  ("<a href=\"HTML3.2/2.3.html#sgml\">SGML</a>\n");
	out.print  ("based system,\n");
	out.print  ("or \"SGML application\" as the jargon goes,\n");
	out.print  ("       a <dfn>numeric character reference</dfn>\n");
	out.print  ("(or, actually, just\n");
	out.print  ("<dfn><a href=\"chars/ref.html\">character references</a></dfn>)\n");
	out.print  ("of the form <code>&amp;#</code><var>number</var><code>;</code> can be used,\n");
	out.print  ("       and it refers to the character which is in\n");
	out.print  ("code position <var>n</var> in the\n");
	out.print  ("       <a href=\"#code\">character code</a> defined for the \"SGML\n");
	out.print  ("       application\" in question.\n");
	out.print  ("This is actually very simple: you specify a character by its index (position, number).\n");
	out.print  ("       But in SGML terminology, the character\n");
	out.print  ("       code which determines the interpretation of\n");
	out.print  ("<code>&amp;#</code><var>number</var><code>;</code> is called, quite confusingly,\n");
	out.print  ("       the <dfn>document character set.</dfn>\n");
	out.print  ("       For HTML, the \"document character set\" is\n");
	out.print  ("<a href=\"#10646\">ISO 10646</a> (or, to be exact, a subset thereof,\n");
	out.print  ("depending on HTML version).\n");
	out.print  ("A most essential point is that\n");
	out.print  ("for HTML,\n");
	out.print  ("the \"document character set\" is completely independent\n");
	out.print  ("       of the <a href=\"#encoding\">encoding</a> of the document!\n");
	out.print  ("(See <A HREF=\"http://www.alanflavell.org.uk//\"\n");
	out.print  ("       >Alan&nbsp;J.&nbsp;Flavell</a>'s\n");
	out.print  ("<a href=\"http://www.alanflavell.org.uk//charset/internat.html\"\n");
	out.print  ("       ><cite>Notes on Internationalization</cite></a>.) The so-called\n");
	out.print  ("<dfn><a href=\"http://www.htmlhelp.com/reference/html40/entities/\"\n");
	out.print  (">character entity references</a></dfn>\n");
	out.print  ("like <code>&amp;Auml;</code> in HTML can be regarded as symbolic names defined\n");
	out.print  ("for some numeric character references.\n");
	out.print  ("In XML, character references use ISO&nbsp;10646 by language definition.\n");
	out.print  ("Although both entity and character references are <em>markup</em>,\n");
	out.print  ("to be used in markup languages, they often replaced by the corresponding\n");
	out.print  ("characters, when a user types text on an Internet discussion forum. \n");
	out.print  ("This might be a conscious decision by the forum designer, but quite\n");
	out.print  ("often it is caused unintentionally.\n");
	out.print  ("<li>In <a href=\n");
	out.print  ("\"styles/howto.html\" title=\n");
	out.print  ("\"How to write style sheets (CSS) - basic resources\">\n");
	out.print  ("<abbr title=\"Cascading Style Sheets\">CSS</abbr></a>,\n");
	out.print  ("you can present a character as <code>\\</code><var>n</var><code>&nbsp;</code>,\n");
	out.print  ("where <var>n</var> is the Unicode code position in hexadecimal.\n");
	out.print  ("  <li> In the\n");
	out.print  ("<a title=\"comp.lang.c Frequently Asked Questions\" \n");
	out.print  ("href=\"http://www.eskimo.com/%7escs/C-faq/top.html\">C programming language</a>,\n");
	out.print  ("one can usually write <code>\\0304</code> to denote Ä within a string\n");
	out.print  ("constant, although this makes the program character code dependent.\n");
	out.print  ("</ul>\n");
	out.print  ("\n");
	out.print  ("<p>As you can see, the notations typically involve\n");
	out.print  ("some (semi-)mnemonic name or the <a href=\"#code\">code number</a>\n");
	out.print  ("of the character, in some\n");
	out.print  ("<a href=\"http://www.cstc.org/data/resources/60/convexp.html\"\n");
	out.print  (">number system</a>.\n");
	out.print  ("(The <a href=\"#latin\">ISO 8859-1</a> code number for our\n");
	out.print  ("example character\n");
	out.print  ("<a href=\"latin1/3.html#C4\"\n");
	out.print  ("title=\"capital a with diaeresis\"\n");
	out.print  ("     >Ä</a> is 196 in decimal, 304 in octal, C4 in hexadecimal.)\n");
	out.print  ("And there is some method of indicating that the letters or digits\n");
	out.print  ("are not to be taken as such but as part of a special notation\n");
	out.print  ("denoting a character. Often some specific character such as\n");
	out.print  ("the <a href=\"latin1/3.html#5C\"\n");
	out.print  (">backslash&nbsp;\\</a> is used as an \"escape character\".\n");
	out.print  ("This implies that such a character cannot be used as such in the\n");
	out.print  ("language or format but must itself be \"escaped\"; for example,\n");
	out.print  ("to include the backslash itself into\n");
	out.print  ("a string constant in C, you need to write it twice&nbsp;(<code>\\\\</code>).</p>\n");
	out.print  ("\n");
	out.print  ("<p>In cases like these, the character itself does not occur in a file\n");
	out.print  ("(such as an HTML document or a C source program). Instead, the file\n");
	out.print  ("contains the \"escape\" notation as a character sequence, which will\n");
	out.print  ("then be <em>interpreted</em> in a specific way by programs like\n");
	out.print  ("a Web browser or a C compiler.\n");
	out.print  ("One can in a sense regard the \"escape notations\"\n");
	out.print  ("as <a href=\"#encoding\">encodings</a> used in specific contexts upon\n");
	out.print  ("specific agreements.</p>\n");
	out.print  ("\n");
	out.print  ("<p><a name=\"human-escape\">There are also \"escape notations\"\n");
	out.print  ("which are to be interpreted by\n");
	out.print  ("<strong>human readers</strong> directly.</a>\n");
	out.print  ("For example, when sending\n");
	out.print  ("E-mail one might use A\" (letter A followed by a quotation mark)\n");
	out.print  ("as a surrogate for Ä (letter A with dieresis), <em>or</em> one\n");
	out.print  ("might use AE instead of Ä. The reader is assumed to understand that\n");
	out.print  ("e.g. A\" on display actually means&nbsp;Ä.\n");
	out.print  ("Quite often the purpose is to use <a href=\"#ascii\">ASCII</a>\n");
	out.print  ("characters only, so that the typing, transmission, and display\n");
	out.print  ("of the characters is \"safe\".\n");
	out.print  ("But this typically means that text becomes very messy; the Finnish\n");
	out.print  ("word <i>Hämäläinen</i> does not look too good or readable when written as\n");
	out.print  ("<i>Ha\"ma\"la\"inen</i> or <i>Haemaelaeinen</i>.\n");
	out.print  ("Such usage is based on special (though often implicit) conventions\n");
	out.print  ("and can cause a lot of confusion when there is no mutual agreement\n");
	out.print  ("on the conventions, especially because there are so many of them.\n");
	out.print  ("(For example, to denote letter a with acute accent, á, a convention might\n");
	out.print  ("use the apostrophe, a', or the solidus, a/, or the acute accent,\n");
	out.print  ("a´, or something else.) \n");
	out.print  ("</p>\n");
	out.print  ("\n");
	out.print  ("<p><a name=\"rfc1345\">There is an old proposal by K. Simonsen,</a>\n");
	out.print  ("<cite>Character Mnemonics&nbsp;&amp; Character Sets</cite>,\n");
	out.print  ("published as <a href=\"http://www.isi.edu/in-notes/rfc1345.txt\"\n");
	out.print  (">RFC&nbsp;1345</a>, which lists a large number of \"escape notations\"\n");
	out.print  ("for characters.\n");
	out.print  ("They are very short, typically two characters, e.g.\n");
	out.print  ("A: for Ä and th for þ&nbsp;(thorn).\n");
	out.print  ("Naturally there's the problem that the reader must know whether e.g.\n");
	out.print  ("th is to be understood that way or as two letters t&nbsp;and&nbsp;h.\n");
	out.print  ("So the system is primarily for <em>referring to</em> characters (see\n");
	out.print  ("below), but under suitable circumstances it could also be used\n");
	out.print  ("for actually writing texts, when the ambiguities can somehow be\n");
	out.print  ("removed by additional conventions or by context.\n");
	out.print  ("RFC&nbsp;1345 cannot be regarded as official or widely known,\n");
	out.print  ("but if you need, for some applications, an \"escape scheme\", you might\n");
	out.print  ("consider using those notations instead of reinventing the wheel.</p> \n");
	out.print  ("\n");
	out.print  ("<h3><a name=\"identify\">How to mention (identify) a character</a></h3>\n");
	out.print  ("\n");
	out.print  ("<p>There are\n");
	out.print  ("also various ways to <strong>identify</strong> a character\n");
	out.print  ("when it cannot be used as such\n");
	out.print  ("or when the appearance of a character is not sufficient identification.\n");
	out.print  ("This might be regarded as a variant of the\n");
	out.print  ("<a href=\"#human-escape\">\"escape notations for human readers\"</a>\n");
	out.print  ("discussed above, but the pragmatic view is different here. We are\n");
	out.print  ("not primarily interested in <em>using</em> characters in running\n");
	out.print  ("text but in <em>specifying</em> which character is being discussed.</p>\n");
	out.print  ("<p>For example, when discussing the\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=0415\">Cyrillic letter that resembles the Latin letter E</a>\n");
	out.print  ("(and may have an identical or very similar\n");
	out.print  ("<a href=\"#glyph\">glyph</a>, and\n");
	out.print  ("is transliterated as E according to\n");
	out.print  ("<a href=\"iso9.html8\">ISO&nbsp;9</a>),\n");
	out.print  ("there are various options:\n");
	out.print  ("<ul>\n");
	out.print  ("<li>\"Cyrillic E\"; this is probably intuitively understandable in this case,\n");
	out.print  ("and can be seen as referring <em>either</em> to the similarity of shape\n");
	out.print  ("     <em>or</em> to the transliteration equivalence; but in the general\n");
	out.print  ("     case these interpretations do not coincide, and the method is\n");
	out.print  ("     otherwise vague too\n");
	out.print  ("<li>\"<code>U+0415</code>\"; this is a unique identification but requires the reader\n");
	out.print  ("     to know the idea of <a href=\"#U\"><code>U</code>+<var>nnnn</var> notations</a>\n");
	out.print  ("<li> \"<span class=\"charname\">cyrillic capital letter ie</span>\"\n");
	out.print  ("     (using the official Unicode <a href=\"#names\">name</a>) or\n");
	out.print  ("     \"cyrillic IE\" (using an abridged version); one problem with this\n");
	out.print  ("     is that the names can be long even if simplified, and they still\n");
	out.print  ("     cannot be assumed to be universally known even by people who\n");
	out.print  ("     recognize the character\n");
	out.print  ("<li> \"KE02\", which uses the special notation system defined in\n");
	out.print  ("     <a href=\"http://www.terena.nl/library/multiling/euroml/section15.html\"\n");
	out.print  ("title=\n");
	out.print  ("\"Character as object, identification methods, ISO 7350\"     \n");
	out.print  ("     >ISO&nbsp;7350</a>; the system uses a compact notation and is\n");
	out.print  ("     marginally mnemonic (K&nbsp;=&nbsp;<i>kirillica</i> 'Cyrillics';\n");
	out.print  ("     the numeric codes indicate small/capital letter variation\n");
	out.print  ("     and the use of <a href=\"#diacritic\">diacritics</a>)\n");
	out.print  ("<li> any of the <a href=\"#esc\">\"escape\" notations</a> discussed above,\n");
	out.print  ("     such as\n");
	out.print  ("\"<code>E=</code>\" by <a href=\"#rfc1345\">RFC&nbsp;1345</a>\n");
	out.print  ("or\n");
	out.print  ("     \"<code>&amp;#1045;</code>\" in HTML;\n");
	out.print  ("    this can be quite adequate in a context where the reader can\n");
	out.print  ("     be assumed to be familiar with the particular notation.\n");
	out.print  ("</ul>\n");
	out.print  ("<H2><A NAME=\"encinfo\">Information about encoding</A></H2>\n");
	out.print  ("\n");
	out.print  ("<H3><A NAME=\"whyencinfo\">The need for information about encoding</A></H3>\n");
	out.print  ("<P>\n");
	out.print  ("It is hopefully obvious from the preceding discussion that\n");
	out.print  ("<EM>a sequence of\n");
	out.print  ("<a href=\"#octet\">octets</a>\n");
	out.print  ("can be interpreted in a multitude of ways</EM>\n");
	out.print  ("when processed as character data.\n");
	out.print  ("By looking at the octet sequence only, you cannot even know\n");
	out.print  ("whether each octet presents one character or just part of\n");
	out.print  ("a two-octet presentation of a character, or something more\n");
	out.print  ("complicated. Sometimes one can guess the encoding, but data\n");
	out.print  ("processing and transfer shouldn't be guesswork.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Naturally, a sequence of octets could be intended to present other than\n");
	out.print  ("character data, too. It could be an image in a bitmap format, or\n");
	out.print  ("a computer program in binary form, or numeric data in the internal\n");
	out.print  ("format used in computers.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("This problem can be handled in different ways in different systems\n");
	out.print  ("when data is stored and processed within one computer system.\n");
	out.print  ("For <EM>data transmission</EM>, a platform-independent method\n");
	out.print  ("of specifying the general format and\n");
	out.print  ("the encoding and other relevant information is needed.\n");
	out.print  ("Such methods exist, although they not always used widely enough.\n");
	out.print  ("People still send each other data without specifying the encoding,\n");
	out.print  ("and this may cause a lot of harm. Attaching a human-readable note,\n");
	out.print  ("such as a few words of explanation in an E-mail message body,\n");
	out.print  ("is better than nothing. But since data is processed by programs which\n");
	out.print  ("cannot understand such notes, the encoding should be specified\n");
	out.print  ("in a standardized computer-readable form.\n");
	out.print  ("</P>\n");
	out.print  ("<H3><A NAME=\"MIME\">The MIME solution</A></H3>\n");
	out.print  ("<h4><a name=\"mediatypes\">Media types</a></h4>\n");
	out.print  ("<p><DFN>Internet media types</DFN>,\n");
	out.print  ("often called\n");
	out.print  ("<DFN>MIME media types</DFN>,\n");
	out.print  ("can be used to specify a\n");
	out.print  ("major media type (\"top level media type\", such as <code>text</code>),\n");
	out.print  ("a subtype (such as <code>html</code>), and an encoding\n");
	out.print  ("(such as\n");
	out.print  ("<code><a href=\"#latin1\">iso-8859-1</a></code>).\n");
	out.print  ("They were originally developed to allow sending other\n");
	out.print  ("than plain <A HREF=\"#ascii\">ASCII</A> data by E-mail.\n");
	out.print  ("They can be (and should be) used for specifying the encoding when\n");
	out.print  ("data is sent over a network, e.g. by E-mail or using the\n");
	out.print  ("<A HREF=\"http://www.w3.org/pub/WWW/Protocols/\"\n");
	out.print  ("TITLE=\"Hypertext Transfer Protocol\"\n");
	out.print  (">HTTP</a> protocol\n");
	out.print  ("on the World Wide Web.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("The media type concept is defined in\n");
	out.print  ("<A TITLE=\n");
	out.print  ("\"Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types\"\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://www.oac.uci.edu/indiv/ehood/MIME/2046/rfc2046.html\"\n");
	out.print  (">RFC 2046</A>.\n");
	out.print  ("The procedure for registering types in given in\n");
	out.print  ("<A HREF=\n");
	out.print  ("\"http://www.oac.uci.edu/indiv/ehood/MIME/2048/rfc2048.html\"\n");
	out.print  (">RFC 2048</A>;\n");
	out.print  ("according to\n");
	out.print  ("it, the registry is kept\n");
	out.print  ("by\n");
	out.print  ("<A TITLE=\"Internet Assigned Numbers Authority\"\n");
	out.print  (" HREF=\"http://www.iana.org/\">IANA</A>\n");
	out.print  ("at\n");
	out.print  ("<nobr>\n");
	out.print  ("<code>ftp://ftp.isi.edu/in-notes/iana/assignments/media-types/</code></nobr>\n");
	out.print  ("but it has in fact been moved to\n");
	out.print  ("<nobr><code><a href=\"http://www.iana.org/assignments/media-types/\"\n");
	out.print  (">http://www.iana.org/assignments/media-types/</a></code></nobr>\n");
	out.print  ("\n");
	out.print  ("<!--\n");
	out.print  ("<br>but you can also access it via<br>\n");
	out.print  ("<nobr><a href=\"http://www.isi.edu/in-notes/iana/assignments/media-types/\"><code>http://www.isi.edu/in-notes/iana/assignments/media-types/</code></a></nobr>\n");
	out.print  ("-->\n");
	out.print  ("</p>\n");
	out.print  ("\n");
	out.print  ("<h4><a name=\"charset\">Character encoding (\"charset\") information</a></h4>\n");
	out.print  ("<P>\n");
	out.print  ("The technical term used to denote\n");
	out.print  ("a\n");
	out.print  ("<a href=\"#encoding\">character <STRONG>encoding</STRONG></a>\n");
	out.print  ("in the Internet media type context is\n");
	out.print  ("\"character set\", abbreviated \"charset\".\n");
	out.print  ("This has caused a lot of confusion, since \"set\" can easily be\n");
	out.print  ("understood as <a href=\"#repertoire\">repertoire</a>!\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Specifically, when data is sent in MIME format, the media type\n");
	out.print  ("and encoding are specified in a manner illustrated by the\n");
	out.print  ("following example:<BR>\n");
	out.print  ("<CODE>Content-Type: text/html; charset=iso-8859-1</CODE>\n");
	out.print  ("<BR>\n");
	out.print  ("This specifies, in addition to saying that the media type is\n");
	out.print  ("<code>text</code> and subtype is <code>html</code>, that the\n");
	out.print  ("character encoding is <a href=\"latin1\">ISO 8859-1</a>.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("<A NAME=\"charsetreg\">\n");
	out.print  ("The official registry of \"charset\" (i.e., character encoding)\n");
	out.print  ("names,</A>\n");
	out.print  ("with references to documents defining their meanings,\n");
	out.print  ("is kept by\n");
	out.print  ("<A TITLE=\"Internet Assigned Numbers Authority\"\n");
	out.print  (" HREF=\"http://www.iana.org/\">IANA</A>\n");
	out.print  ("at<BR>\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.iana.org/assignments/character-sets\"\n");
	out.print  (">\n");
	out.print  ("<code>\n");
	out.print  ("http://www.iana.org/assignments/character-sets\n");
	out.print  ("</code></a>\n");
	out.print  ("<br>(According to the documentation of the registration\n");
	out.print  ("procedure, <a href=\n");
	out.print  ("\"ftp://ftp.isi.edu/in-notes/rfc2978.txt\">RFC&nbsp;2978</a>,\n");
	out.print  ("it should be elsewhere, but it has been moved.)\n");
	out.print  ("I have composed a\n");
	out.print  ("<a href=\"chars/sorted.html\">tabular presentation of the registry</a>, ordered alphabetically\n");
	out.print  ("by \"charset\" name and accompanied with some hypertext references.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Several character encodings have alternate (alias) names in the registry.\n");
	out.print  ("For example, the basic (ISO 646) variant of\n");
	out.print  ("<A HREF=\"#ascii\">ASCII</A> can be called \"ASCII\" or\n");
	out.print  ("\"ANSI_X3.4-1968\" or \"cp367\" (plus a few other names);\n");
	out.print  ("the preferred name in <a href=\"#MIME\">MIME</a> context is,\n");
	out.print  ("according to the registry, \"US-ASCII\".\n");
	out.print  ("Similarly, <a href=\"#latin1\">ISO 8859-1</a> has several names,\n");
	out.print  ("the preferred MIME name being \"ISO-8859-1\".\n");
	out.print  ("The \"native\" encoding for Unicode,\n");
	out.print  ("<a href=\"#ucs2\">UCS-2</a>,\n");
	out.print  ("is named \"ISO-10646-UCS-2\" there.</p>\n");
	out.print  ("<h4><A NAME=\"headers\">MIME headers</a></h4>\n");
	out.print  ("<p>The <code>Content-Type</code> information\n");
	out.print  ("is an example of information in a\n");
	out.print  ("<DFN>header</DFN>.\n");
	out.print  ("Headers relate to some data, describing its presentation and\n");
	out.print  ("other things, but are passed as logically separate from it.\n");
	out.print  ("Possible headers and their contents are defined in\n");
	out.print  ("<strong>the basic MIME specification</strong>,\n");
	out.print  ("<A TITLE=\n");
	out.print  ("\"Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies\"\n");
	out.print  ("HREF=\"ftp://nis.nsf.net/internet/documents/rfc/rfc2045.txt\">\n");
	out.print  ("RFC 2045</A>.\n");
	out.print  ("Adequate headers\n");
	out.print  ("should normally be\n");
	out.print  ("generated automatically by the software which sends the data\n");
	out.print  ("(such as a program\n");
	out.print  ("for sending E-mail,\n");
	out.print  ("or a Web server) and interpreted automatically\n");
	out.print  ("by receiving software (such as a program for reading E-mail,\n");
	out.print  ("or a Web browser).\n");
	out.print  ("In E-mail messages, headers precede the message body; it depends\n");
	out.print  ("on the E-mail program whether and how it displays the headers.\n");
	out.print  ("For Web documents, a Web server is required to send headers when\n");
	out.print  ("it delivers a document to a browser (or other user agent) which\n");
	out.print  ("has sent a request for the document.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<p>In addition to media types and character encodings, MIME addresses several\n");
	out.print  ("other aspects too. <a href=\"http://www.oac.uci.edu/indiv/ehood/\">Earl Hood</a> has\n");
	out.print  ("composed the documentation\n");
	out.print  ("<cite><a href=\"http://www.oac.uci.edu/indiv/ehood/MIME/MIME.html\"\n");
	out.print  (">Multipurpose Internet Mail Extensions MIME</a></cite>, which contains\n");
	out.print  ("the basic RFCs on MIME in hypertext format and\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.oac.uci.edu/indiv/ehood/MIME/toc.html\"\n");
	out.print  ("title=\"(MIME) Table of Contents\"\n");
	out.print  (">a&nbsp;common table of contents</a> for them.</p>\n");
	out.print  ("\n");
	out.print  ("<h3><A NAME=\"qp\">An auxiliary encoding: Quoted-Printable (QP)</A></h3>\n");
	out.print  ("<P>\n");
	out.print  ("<A HREF=\"#MIME\">The MIME specification</A>\n");
	out.print  ("defines, among many other things,\n");
	out.print  ("the general purpose\n");
	out.print  ("\"Quoted-Printable\" (QP)\n");
	out.print  ("<a href=\"#encoding\">encoding</a> which can be used to present any\n");
	out.print  ("sequence of\n");
	out.print  ("<a href=\"#octet\">octets</a>\n");
	out.print  ("as a sequence of such octets which correspond\n");
	out.print  ("to <a href=\"#ASCII\">ASCII</a> characters.\n");
	out.print  ("This implies that the sequence of octets becomes longer, and\n");
	out.print  ("if it is read as an ASCII string, it can be incomprehensible to\n");
	out.print  ("humans. But what is gained is robustness in data transfer, since\n");
	out.print  ("the encoding uses only \"safe\" ASCII characters which will most\n");
	out.print  ("probably get through any component in the transfer unmodified.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Basically, QP encoding means that\n");
	out.print  ("most octets smaller than 128 are used as such, whereas\n");
	out.print  ("larger octets and some of the small ones are presented as follows:\n");
	out.print  ("octet <var>n</var> is presented as\n");
	out.print  ("a sequence of three octets, corresponding to ASCII codes for\n");
	out.print  ("the <code>=</code> sign and the two digits of the\n");
	out.print  ("<A TITLE=\n");
	out.print  ("'description of hexadecimal notation (in Free On-line Dictionary of Computing)'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=hexadecimal&amp;action=Search\"\n");
	out.print  (">hexadecimal</A>\n");
	out.print  ("notation of <var>n</var>.\n");
	out.print  ("If QP encoding is applied to a sequence of octets presenting\n");
	out.print  ("character data according to <a href=\"#latin1\">ISO 8859-1</a>\n");
	out.print  ("character code, then\n");
	out.print  ("effectively this means that most ASCII characters\n");
	out.print  ("(including all ASCII letters)\n");
	out.print  ("are preserved as such whereas e.g. the ISO 8859-1 character\n");
	out.print  ("<a href=\"latin1/3.html#E4\"\n");
	out.print  ("title=\"SMALL LETTER A WITH DIAERESIS\"\n");
	out.print  (">ä</a>\n");
	out.print  ("(code position 228 in decimal, E4 in hexadecimal)\n");
	out.print  ("is encoded as <code>=E4</code>.\n");
	out.print  ("(For obvious reasons, the equals sign <code>=</code> itself is among\n");
	out.print  ("the few ASCII characters which are encoded. Being in code position\n");
	out.print  ("61 in decimal, 3D in hexadecimal, it is encoded as <code>=3D</code>.)\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Notice that encoding ISO 8859-1 data this way means that the\n");
	out.print  ("<em>character code</em> is the one specified by the ISO 8859-1 standard,\n");
	out.print  ("whereas the <em>character encoding</em> is different from the one\n");
	out.print  ("specified (or at least suggested) in that standard.\n");
	out.print  ("Since QP only specifies the mapping of a sequence of octets to\n");
	out.print  ("another sequence of octets, it is a pure encoding and can be applied\n");
	out.print  ("to any character data, or to any data for that matter.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Naturally, Quoted-Printable encoding needs to be processed\n");
	out.print  ("by a program which knows it and can convert it to human-readable form.\n");
	out.print  ("It looks rather confusing when displayed as such.\n");
	out.print  ("Roughly speaking, one can expect most <em>E-mail</em> programs to\n");
	out.print  ("be able to handle QP, but the same does not apply to\n");
	out.print  ("<em>newsreaders</em> (or Web browsers).\n");
	out.print  ("Therefore, you should normally use QP in E-mail only.\n");
	out.print  ("</P>\n");
	out.print  ("<H3><A NAME=\"MIMEatwork\">How MIME should work in practice</A></H3>\n");
	out.print  ("<P>\n");
	out.print  ("Basically, MIME should let people communicate smoothly\n");
	out.print  ("without hindrances caused by character code and encoding\n");
	out.print  ("differences. MIME should handle the necessary conversions\n");
	out.print  ("automatically and invisibly.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("For example, when person <var>A</var> sends E-mail\n");
	out.print  ("to person <var>B</var>, the following should happen:\n");
	out.print  ("The E-mail program used by <var>A</var> encodes <var>A</var>'s message in\n");
	out.print  ("some particular manner, probably according to some\n");
	out.print  ("convention which is normal on the system where the program is used\n");
	out.print  ("(such as\n");
	out.print  ("<a href=\"#latin1\">ISO 8859-1</a>\n");
	out.print  ("encoding on a typical modern Unix system).\n");
	out.print  ("The program automatically includes information about this\n");
	out.print  ("encoding into an E-mail header, which is usually invisible both\n");
	out.print  ("when sending and when reading the message.\n");
	out.print  ("The message, with the headers, is then delivered, through network\n");
	out.print  ("connections, to <var>B</var>'s system. When\n");
	out.print  ("<var>B</var> uses his\n");
	out.print  ("E-mail program (which may be very different from <var>A</var>'s)\n");
	out.print  ("to read the message, the program should automatically pick up\n");
	out.print  ("the information about the encoding as specified in a header\n");
	out.print  ("and interpret the message body according to it.\n");
	out.print  ("For example, if <var>B</var> is using a Macintosh computer, the\n");
	out.print  ("program would automatically convert the message into\n");
	out.print  ("<A HREF=\"#maccode\">Mac's internal character encoding</A>\n");
	out.print  ("and only then display it.\n");
	out.print  ("Thus, if the message was\n");
	out.print  ("<a href=\"#latin1\">ISO 8859-1</a>\n");
	out.print  ("encoded and contained\n");
	out.print  ("the Ä (upper case A with dieresis) character, encoded as octet 196,\n");
	out.print  ("the E-mail program used on the Mac should use a conversion table\n");
	out.print  ("to map this to octet 128, which is the encoding for Ä on Mac.\n");
	out.print  ("(If the program fails to do such a conversion, strange things will\n");
	out.print  ("happen. <A HREF=\"#ascii\">ASCII</A> characters would be displayed correctly, since they\n");
	out.print  ("have the same codes in both encodings, but instead of Ä, the\n");
	out.print  ("character corresponding to octet 196 in Mac encoding would appear - a\n");
	out.print  ("symbol which looks like <I>f</I> in italics.)\n");
	out.print  ("</P>\n");
	out.print  ("<H3><A NAME=\"problems\">Problems with implementations - examples</A></H3>\n");
	out.print  ("<P>\n");
	out.print  ("Unfortunately, there are deficiencies and errors \n");
	out.print  ("in software so that <EM>users</EM> often have to struggle\n");
	out.print  ("with character code conversion\n");
	out.print  ("problems, perhaps correcting the actions taken\n");
	out.print  ("by programs.\n");
	out.print  ("It takes two to tango, and some more participants to get\n");
	out.print  ("characters right.\n");
	out.print  ("This section demonstrates different things\n");
	out.print  ("which may happen, and do happen, when just <em>one</em> component\n");
	out.print  ("is faulty, i.e. when <strong>MIME is not used or is inadequately\n");
	out.print  ("supported by some \"partner\"</strong> (software involved\n");
	out.print  ("in entering, storing, transferring, and displaying character data).   \n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Typical minor (!) problems which may occur in communication\n");
	out.print  ("in Western European languages other than English is that\n");
	out.print  ("most characters get interpreted and displayed correctly but\n");
	out.print  ("some \"national letters\" don't.\n");
	out.print  ("For example,\n");
	out.print  ("character repertoire needed in\n");
	out.print  ("German, Swedish, and Finnish is essentially <A HREF=\"#ascii\">ASCII</A> plus a few\n");
	out.print  ("letters like \"ä\" from the rest of\n");
	out.print  ("<a href=\"#latin1\">ISO Latin&nbsp;1</a>.\n");
	out.print  ("If a text in such a language is processed so that a necessary\n");
	out.print  ("conversion is not applied, or an incorrect conversion is applied,\n");
	out.print  ("the result might be that e.g. the word \"später\" becomes\n");
	out.print  ("\"spter\" or \"spÌter\" or \"spdter\" or \"sp=E4ter\".\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Sometimes you might be able to <em>guess</em> what has happened,\n");
	out.print  ("and perhaps to determine which code conversion should be applied,\n");
	out.print  ("and apply it more or less \"by hand\".\n");
	out.print  ("To take an example (which may have some practical value in itself\n");
	out.print  ("to people using languages mentioned)\n");
	out.print  ("Assume that you have some text data which is expected to be, say,\n");
	out.print  ("in German, Swedish or Finnish and which appears to be such text\n");
	out.print  ("with some characters replaced by oddities in a somewhat systematic\n");
	out.print  ("way. Locate some words which probably should contain\n");
	out.print  ("<a href=\"latin1/3.html#e4\"\n");
	out.print  ("title=\"small a with diaeresis\"\n");
	out.print  ("     >the letter \"ä\"</a>\n");
	out.print  ("but have\n");
	out.print  ("something strange in place of it (see examples above). Assume\n");
	out.print  ("further that the program you are using interprets text data\n");
	out.print  ("according to\n");
	out.print  ("<a href=\"#latin1\">ISO 8859-1</a>\n");
	out.print  ("by default and that the actual data is\n");
	out.print  ("not accompanied with a suitable indication (like a\n");
	out.print  ("<code>Content-Type</code> header) of the encoding, or such an\n");
	out.print  ("indication is obviously in error. Now,\n");
	out.print  ("<strong>looking at what appears instead of \"ä\", we might guess:</strong>\n");
	out.print  ("</P>\n");
	out.print  ("<TABLE BORDER=1 CELLPADDING=\"6\">\n");
	out.print  ("<TR><TH><SAMP>a</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The person who wrote the text assumably just used \"a\" instead of&nbsp;\"ä\",\n");
	out.print  ("probably because he thought that \"ä\" would not get through correctly.\n");
	out.print  ("Although \"ä\" is surely problematic, the cure usually\n");
	out.print  ("is worse than the disease: using \"a\" instead of \"ä\" loses information\n");
	out.print  ("and may change the meanings of words.\n");
	out.print  ("This usage, and the next two usages below,\n");
	out.print  ("is (usually) not directly caused by incorrect implementations but by\n");
	out.print  ("the human writer; however, it is <em>indirectly</em> caused by them.\n");
	out.print  ("</TD>\n");
	out.print  ("</TR>\n");
	out.print  ("<TR><TH><SAMP>ae</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("Similarly to the above-mentioned case, this is usually\n");
	out.print  ("an attempt to avoid writing \"ä\". For some languages (e.g. German),\n");
	out.print  ("using \"ae\" as a surrogate for \"ä\" works to some extent, but it\n");
	out.print  ("is much less applicable to Swedish or Finnish - and loses information,\n");
	out.print  ("since the letter pair \"ae\" can genuinely occur in many words.\n");
	out.print  ("</TD>\n");
	out.print  ("</TR>\n");
	out.print  ("<TR><TH><SAMP>a\"</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("Yet another surrogate. It resembles an old (and generally outdated)\n");
	out.print  ("<a href=\"latin1/4.html#diacr\"\n");
	out.print  (">idea of using the quotation mark as a diacritic mark too in ASCII</a>\n");
	out.print  ("but it is probably expected to be understood by humans instead of\n");
	out.print  ("being converted to an \"ä\" by a program.\n");
	out.print  ("</TD>\n");
	out.print  ("</TR>\n");
	out.print  ("<TR><TH><SAMP>d</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The original data was actually\n");
	out.print  ("<a href=\"#latin1\">ISO 8859-1</a>\n");
	out.print  ("encoded or\n");
	out.print  ("       something similar (e.g.\n");
	out.print  ("<a href=\"#win\">Windows encoded</a>)\n");
	out.print  ("but during\n");
	out.print  ("       data transfer the most significant\n");
	out.print  ("<A TITLE='definition of \"bit\" in Free On-line Dictionary of Computing'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=bit&amp;action=Search\"\n");
	out.print  (">bit</A>\n");
	out.print  ("of each octet\n");
	out.print  ("was lost.\n");
	out.print  ("(Such things may happen in systems for transferring, or\n");
	out.print  ("\"gatewaying\", data from one network to another.\n");
	out.print  ("Sometimes it might be your <EM>terminal</EM> that has been\n");
	out.print  ("configured to \"mask out\" the most significant bit!)\n");
	out.print  ("This means that the octet representing \"ä\" in\n");
	out.print  ("<a href=\"#latin1\">ISO 8859-1</a>,\n");
	out.print  ("i.e. 228,\n");
	out.print  ("became 228 - 128 = 100, which is the ISO 8859-1 encoding of letter d.\n");
	out.print  ("<TR><TH><SAMP>{</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("Obviously, the data is in <A HREF=\"#ascii\">ASCII</A> encoding so that the character\n");
	out.print  ("\"{\" is used in place of \"ä\". Earlier it was common to use various\n");
	out.print  ("<a href=\"#national-ascii\">national variants of ASCII</a>,\n");
	out.print  ("with characters\n");
	out.print  ("#$@[\\]^_`{|}~ replaced by national characters according to the\n");
	out.print  ("needs of a particular language.\n");
	out.print  ("Thus they modified the character repertoire of ASCII by dropping\n");
	out.print  ("out some special characters and introducing national characters\n");
	out.print  ("into their ASCII code positions.\n");
	out.print  ("It requires further study to determine the actual encoding used,\n");
	out.print  ("since e.g. Swedish, German and Finnish ASCII variants all have \"ä\"\n");
	out.print  ("as a replacement for \"{\", but there are differences in other\n");
	out.print  ("replacements.\n");
	out.print  ("<TR><TH><SAMP>Ã¤</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The data is evidently in <a href=\"#utf\">UTF-8</a> encoding.\n");
	out.print  ("Notice that the characters Ã and ¤ stand here for octets\n");
	out.print  ("195 and 164, which might be\n");
	out.print  ("displayed\n");
	out.print  ("differently depending on program\n");
	out.print  ("and device used.\n");
	out.print  ("<TR><TH><SAMP>+AOQ-</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The data is in <a href=\"#utf\">UTF-7</a> encoding.\n");
	out.print  ("<TR><TH><SAMP>Ì</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The data is most probably in\n");
	out.print  ("<A TITLE=\"Roman 8 Character Conversion Table\" HREF=\n");
	out.print  ("\"http://www.robelle.com/library/smugbook/roman8.html\"\n");
	out.print  (">Roman-8</A> encoding\n");
	out.print  ("(defined\n");
	out.print  ("by Hewlett-Packard).\n");
	out.print  ("<TR><TH><SAMP>=E4</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The data is in\n");
	out.print  ("<A HREF=\"#qp\">Quoted-Printable</A> encoding.\n");
	out.print  ("The original encoding, upon which the QP encoding was applied,\n");
	out.print  ("might be <A HREF=\"#latin1\">ISO 8859-1</A>, or any other\n");
	out.print  ("encoding which represents character \"ä\" in the same way as\n");
	out.print  ("ISO 8859-1 (i.e. as octet 228 decimal, E4 hexadecimal).\n");
	out.print  ("<TR><TH><SAMP>&amp;auml;</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The data is in\n");
	out.print  ("<a title=\"A primer on the HTML language\"\n");
	out.print  ("href=\"html-primer.html\">HTML</a> format;\n");
	out.print  ("the encoding may vary.\n");
	out.print  ("The notation <samp>&amp;auml;</samp> is a so-called\n");
	out.print  ("<a href=\"http://www.htmlhelp.com/reference/html40/entities/\"\n");
	out.print  (">character entity reference</a>.\n");
	out.print  ("<TR><TH><SAMP>&amp;#228;</SAMP><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("The data is in\n");
	out.print  ("<a title=\"A primer on the HTML language\"\n");
	out.print  ("href=\"html-primer.html\">HTML</a> format;\n");
	out.print  ("the encoding may vary.\n");
	out.print  ("The notation <samp>&amp;#228;</samp> is a so-called\n");
	out.print  ("<a href=\"http://www.htmlhelp.com/reference/html40/entities/\"\n");
	out.print  (">numeric character reference</a>.\n");
	out.print  ("(Notice that 228 is the\n");
	out.print  ("<a href=\"#code\">code position</a>\n");
	out.print  ("for &auml; in\n");
	out.print  ("<a href=\"#10646\">Unicode</a>.)\n");
	out.print  ("<TR><TH>&#8240; (per mille sign, <sup>0</sup>/<sub>00</sub>)<BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("This character\n");
	out.print  ("occupies code position 228 in the <a href=\"#maccode\">Macintosh\n");
	out.print  ("character code</a>. Thus, what has probably happened is\n");
	out.print  ("that some program has received some ISO 8859-1 encoded data\n");
	out.print  ("and interpreted it as if it were in Mac encoding, then\n");
	out.print  ("performed some conversion based on that interpretation.\n");
	out.print  ("Since\n");
	out.print  ("<span class=\"charname\">\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=2030\">\n");
	out.print  ("per mille sign</a></span>\n");
	out.print  ("is not an ISO 8859-1 character,\n");
	out.print  ("your program is actually not applying ISO 8859-1 interpretation.\n");
	out.print  ("Perhaps an erroneous conversion turned\n");
	out.print  ("228 into 137, which is the code position of the per mille\n");
	out.print  ("sign in the <a href=\"#win\">Windows character code</a>.\n");
	out.print  ("Windows programs usually interpret data according that code\n");
	out.print  ("even when they are <em>said</em> to apply ISO 8859-1.\n");
	out.print  ("</TD>\n");
	out.print  ("</TR>\n");
	out.print  ("<TR><TH>&#931; (capital sigma)<BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("This character\n");
	out.print  ("occupies code position 228 in <a href=\"#cp\">DOS\n");
	out.print  ("code page</a> 437.\n");
	out.print  ("Since\n");
	out.print  ("<span class=\"charname\">\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=03a3\">greek\n");
	out.print  ("capital letter sigma</a></span>\n");
	out.print  ("is not an ISO 8859-1 character,\n");
	out.print  ("your program is actually not applying ISO 8859-1 interpretation,\n");
	out.print  ("for some reason. Perhaps it is interpreting the data according\n");
	out.print  ("to DOS CP 437, or perhaps the data had been incorrectly\n");
	out.print  ("converted to some encoding where sigma has a presentation.\n");
	out.print  ("</TD>\n");
	out.print  ("</TR>\n");
	out.print  ("<TR><TH><i>nothing</i><BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("Perhaps the data was encoded in\n");
	out.print  ("<A HREF=\"#cp\">DOS encoding</A>\n");
	out.print  ("(e.g. code page 850),\n");
	out.print  ("where the code for \"ä\" is 132. In\n");
	out.print  ("<a href=\"#latin1\">ISO 8859-1</a>,\n");
	out.print  ("octet 132 is in the\n");
	out.print  ("area reserved for\n");
	out.print  ("<a href=\"#control\">control characters</a>;\n");
	out.print  ("typically such octets are\n");
	out.print  ("not displayed at all, or perhaps displayed as blank.\n");
	out.print  ("If you can access the data in binary form,\n");
	out.print  ("you could find evidence\n");
	out.print  ("for this hypothesis by noticing that octets 132 actually appear there.\n");
	out.print  ("(For instance, the\n");
	out.print  ("       <a href=\"emacs-iso.html\"\n");
	out.print  ("title=\"GNU Emacs Manual\"\n");
	out.print  ("       >Emacs</a>\n");
	out.print  ("editor would display such an octet as\n");
	out.print  ("<samp>\\204</samp>, since 204 is the\n");
	out.print  ("<A TITLE=\n");
	out.print  ("'description of octal notation (in Free On-line Dictionary of Computing)'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=octal&amp;action=Search\"\n");
	out.print  (">octal</A>\n");
	out.print  ("notation for 132.)\n");
	out.print  ("If, on the other hand, it's not octet 132 but octet 138, then\n");
	out.print  ("the data is most probably in\n");
	out.print  ("<A HREF=\"#maccode\">Macintosh encoding</A>.\n");
	out.print  ("<TR><TH>&#8222; (double low-9 quotation mark)<BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("Most probably the data was encoded in\n");
	out.print  ("<A HREF=\"#cp\">DOS encoding</A>\n");
	out.print  ("(e.g. code page 850),\n");
	out.print  ("where the code for \"ä\" is 132. Your program is not actually interpreting\n");
	out.print  ("the data as ISO 8859-1 encoded but according to\n");
	out.print  ("the so-called  <a href=\"#win\">Windows character code</a>,\n");
	out.print  ("where this code position is occupied by the\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=201e\"><span\n");
	out.print  ("class=\"charname\">double low-9\n");
	out.print  ("quotation mark</span></a>.\n");
	out.print  ("</td></tr>\n");
	out.print  ("<TR><TH>&#352; (capital S with caron)<BR></TH>\n");
	out.print  ("<TD>\n");
	out.print  ("Most probably the data was encoded in\n");
	out.print  ("<A HREF=\"#maccode\">Macintosh encoding</A>,\n");
	out.print  ("where the code for \"ä\" is 138. Your program is not actually interpreting\n");
	out.print  ("the data as ISO 8859-1 encoded but according to\n");
	out.print  ("the so-called  <a href=\"#win\">Windows character code</a>,\n");
	out.print  ("where this code position is occupied by the\n");
	out.print  ("<a href=\"http://www.eki.ee/letter/chardata.cgi?ucode=0160\"\n");
	out.print  ("><span class=\"charname\">latin capital letter s with caron</span></a>.\n");
	out.print  ("</td></tr>\n");
	out.print  ("</TABLE>\n");
	out.print  ("\n");
	out.print  ("<P>To illustrate <strong>what may happen\n");
	out.print  ("when text is sent in a grossly invalid form</strong>,\n");
	out.print  ("consider the following example.\n");
	out.print  ("I sent myself E-mail, using Netscape 4.0 (on Windows 95).\n");
	out.print  ("In the mail composition window, I set the encoding to\n");
	out.print  ("<a href=\"#utf\">UTF-8</a>.\n");
	out.print  ("The body of my message was simply<BR>\n");
	out.print  ("Tämä on testi.<BR>\n");
	out.print  ("(That's Finnish for 'This is a test'. The second and fourth\n");
	out.print  ("character is letter a with umlaut.)\n");
	out.print  ("Trying to read the mail on my Unix account, using the Pine E-mail\n");
	out.print  ("program (popular among Unix users), I saw the following\n");
	out.print  ("(when in \"full headers\" mode; irrelevant headers omitted here):\n");
	out.print  ("</P>\n");
	out.print  ("<PRE>\n");
	out.print  ("X-Mailer: Mozilla 4.0 [en] (Win95; I)\n");
	out.print  ("MIME-Version: 1.0\n");
	out.print  ("To: jkorpela@cs.tut.fi\n");
	out.print  ("Subject: Test\n");
	out.print  ("X-Priority: 3 (Normal)\n");
	out.print  ("Content-Type: text/plain; charset=x-UNICODE-2-0-UTF-7\n");
	out.print  ("Content-Transfer-Encoding: 7bit\n");
	out.print  ("\n");
	out.print  ("    [The following text is in the \"x-UNICODE-2-0-UTF-7\" character set]\n");
	out.print  ("    [Your display is set for the \"ISO-8859-1\" character set]\n");
	out.print  ("    [Some characters may be displayed incorrectly]\n");
	out.print  ("\n");
	out.print  ("T+O6Q- on testi.\n");
	out.print  ("</PRE>\n");
	out.print  ("<P>\n");
	out.print  ("Interesting, isn't it?\n");
	out.print  ("I specifically requested\n");
	out.print  ("<A TITLE=\"Information about UTF-8 and UTF-7 encodings\"\n");
	out.print  (" HREF=\"#utf\">UTF-8</a>\n");
	out.print  ("encoding, but Netscape used UTF-7.\n");
	out.print  ("And it did not include a correct header, since\n");
	out.print  ("<code>x-UNICODE-2-0-UTF-7</code> is not a\n");
	out.print  ("<A TITLE=\"Information about charset registration procedure\"\n");
	out.print  ("HREF=\"#charsetreg\">registered \"charset\" name</A>.\n");
	out.print  ("Even if the encoding had been a registered one, there would\n");
	out.print  ("have been no guarantee that my E-mail program would have been\n");
	out.print  ("able to handle the encoding.\n");
	out.print  ("The example, \"T+O6Q-\" instead of \"Tämä\", illustrates what may\n");
	out.print  ("happen when an octet sequence is interpreted according to another\n");
	out.print  ("encoding than the intended one.\n");
	out.print  ("In fact, it is difficult to say what Netscape was really doing,\n");
	out.print  ("since it seems to have encoded incorrectly.\n");
	out.print  ("<P>\n");
	out.print  ("<a name=\"utf7ex\">A <em>correct</em> UTF-7 encoding for \"Tämä\" would be\n");
	out.print  ("\"T+AOQ-m+AOQ-\".</a>\n");
	out.print  ("The \"+\" and \"-\" characters correspond to octets indicating a switch\n");
	out.print  ("to \"shifted encoding\" and back from it.\n");
	out.print  ("The shifted encoding is based on presenting\n");
	out.print  ("<A HREF=\"#10646\">Unicode</A>\n");
	out.print  ("values first\n");
	out.print  ("as 16-bit binary integers, then regrouping the bits and presenting\n");
	out.print  ("the resulting\n");
	out.print  ("six-<A TITLE='definition of \"bit\" in Free On-line Dictionary of Computing'\n");
	out.print  ("HREF=\n");
	out.print  ("\"http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=bit&amp;action=Search\"\n");
	out.print  (">bit</A>\n");
	out.print  ("groups as octets according to a table\n");
	out.print  ("specified in\n");
	out.print  ("<A TITLE=\n");
	out.print  ("\"Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies\"\n");
	out.print  ("HREF=\"ftp://nis.nsf.net/internet/documents/rfc/rfc2045.txt\">\n");
	out.print  ("RFC 2045</A> in the section on Base64.\n");
	out.print  ("See also\n");
	out.print  ("<A TITLE=\"UTF-7 - A Mail-Safe Transformation Format of Unicode\"\n");
	out.print  ("HREF=\"ftp://nis.nsf.net/internet/documents/rfc/rfc2152.txt\">RFC 2152</A>.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<H2><A NAME=\"concl\">Practical conclusions</A></H2>\n");
	out.print  ("<P>\n");
	out.print  ("Whenever text data is sent over a network,\n");
	out.print  ("the sender and the recipient should have a joint\n");
	out.print  ("<strong>agreement on the character encoding</strong>\n");
	out.print  ("used.\n");
	out.print  ("In the optimal case, this is handled by the software automatically,\n");
	out.print  ("but in reality the users need to take some precautions.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("Most importantly, make sure that any\n");
	out.print  ("<strong>Internet-related software</strong>\n");
	out.print  ("that you\n");
	out.print  ("use to send data\n");
	out.print  ("<strong>specifies the encoding</strong>\n");
	out.print  ("correctly in suitable\n");
	out.print  ("headers.\n");
	out.print  ("There are two things involved: the header must be there and it must\n");
	out.print  ("reflect the actual encoding used; and the encoding used must be one\n");
	out.print  ("that is widely understood by the (potential) recipients' software.\n");
	out.print  ("One must often make compromises as regards to the latter aim:\n");
	out.print  ("you may need to use an encoding which is not yet widely supported\n");
	out.print  ("to get your message through at all.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("It is useful to find out how to make\n");
	out.print  ("your Web browser, newsreader, and E-mail program\n");
	out.print  ("so that you can display the encoding information for the page,\n");
	out.print  ("article, or message you are reading.\n");
	out.print  ("(For example, on Netscape use <code>View Page Info</code>;\n");
	out.print  ("on News Xpress, use\n");
	out.print  ("<code>View Raw Format</code>; on Pine, use <code>h</code>.)\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("If you use, say, Netscape to send E-mail or to post\n");
	out.print  ("to Usenet news, make sure it sends the message in a reasonable\n");
	out.print  ("form. In particular,\n");
	out.print  ("<a href=\"http://extra.newsguy.com/%7eschramm/nhtml.html\"\n");
	out.print  ("title=\"Posting News Using HTML (Please don't)\"\n");
	out.print  (">make sure it does not send the message as HTML</a> or duplicate it\n");
	out.print  ("by sending it both as plain text and as HTML (select plain text only).\n");
	out.print  ("As regards to character encoding, make sure it is something\n");
	out.print  ("widely understood, such as <A HREF=\"#ascii\">ASCII</A>, some\n");
	out.print  ("<A TITLE=\"The ISO 8859 Alphabet Soup, with code tables\"\n");
	out.print  ("HREF=\n");
	out.print  ("\"#8859\">ISO 8859</a>\n");
	out.print  ("encoding, or\n");
	out.print  ("<a href=\"#utf\">UTF-8</a>,\n");
	out.print  ("depending on how large character repertoire you need.\n");
	out.print  ("</P>\n");
	out.print  ("<P>\n");
	out.print  ("In particular,\n");
	out.print  ("<strong>avoid sending data in a proprietary encoding</strong>\n");
	out.print  ("(like\n");
	out.print  ("the\n");
	out.print  ("<A HREF=\"#maccode\">Macintosh encoding</A>\n");
	out.print  ("or a\n");
	out.print  ("<A HREF=\"#cp\">DOS encoding</A>)\n");
	out.print  ("to a public\n");
	out.print  ("network. At the very least, if you do that, make sure that the\n");
	out.print  ("message heading specifies the encoding!\n");
	out.print  ("There's nothing wrong with using such an encoding within a single\n");
	out.print  ("computer or in data transfer between similar computers.\n");
	out.print  ("But when sent to Internet, data should be converted to a more\n");
	out.print  ("widely known encoding, by the sending program. If you cannot\n");
	out.print  ("find a way to configure your program to do that, get another program.\n");
	out.print  ("</P>\n");
	out.print  ("<P><SMALL>\n");
	out.print  ("As regards to other forms of transfer of data in digital form,\n");
	out.print  ("such as diskettes, information about encoding is important, too.\n");
	out.print  ("The problem is typically handled by guesswork. Often the crucial\n");
	out.print  ("thing is to know which <em>program</em> was used to generate\n");
	out.print  ("the data, since the text data might be inside a file in, say,\n");
	out.print  ("the MS Word format which can only be read by (a suitable version of)\n");
	out.print  ("MS Word or by a program which knows its internal data format.\n");
	out.print  ("That format, once recognized, might contain information which\n");
	out.print  ("specifies the character encoding used in the text data included;\n");
	out.print  ("or it might not, in which case one has to ask the sender, or make\n");
	out.print  ("a guess, or use trial and error&nbsp;- viewing the data using\n");
	out.print  ("different encodings until something sensible appears.\n");
	out.print  ("</SMALL>\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<H2><A NAME=\"more\">Further reading</A></H2>\n");
	out.print  ("\n");
	out.print  ("<UL>\n");
	out.print  ("<li><a href=\"http://www.joelonsoftware.com/articles/Unicode.html\">\n");
	out.print  ("The Absolute Minimum Every Software Developer Absolutely, Positively Must Know About Unicode and Character Sets</a>\n");
	out.print  ("by Joel on Software. An enjoyable nice treatise, though probably not\n");
	out.print  ("quite the absolute minimum.\n");
	out.print  ("<!--  [LI][a href=\"http://www.ewos.be/tg-cs/gtop.htm\"][cite]Character\n");
	out.print  ("     Sets[/cite][/a] in\n");
	out.print  ("[a href=\"http://www.ewos.be/goss/top.htm\"\n");
	out.print  ("title=\"Guide to Open Systems Specifications\"\n");
	out.print  ("     ][cite]GOSS[/cite][/a]. -->\n");
	out.print  ("  <LI> <a href=\n");
	out.print  ("\"http://developer.apple.com/techpubs/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.9e.html\"\n");
	out.print  ("><cite>Character Encodings Concepts</cite></a>, adapted from a\n");
	out.print  ("     presentation by Peter Edberg at a Unicode conference.\n");
	out.print  ("Old, but a rich source of information, with good illustrations.\n");
	out.print  ("<!--\"http://developer.apple.com/techpubs/mac/TextEncodingCMgr/TECRefBook-140.html\"-->\n");
	out.print  ("<!--\"http://developer.apple.com/techpubs/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.9e.html\"-->\n");
	out.print  ("<LI>\n");
	out.print  ("<a href=\n");
	out.print  ("\"http://www.alanflavell.org.uk//iso8859/iso8859-pointers.html\">\n");
	out.print  ("  <cite>ISO-8859 briefing and resources</cite>\n");
	out.print  ("  </a>\n");
	out.print  ("by\n");
	out.print  ("<A HREF=\"http://www.alanflavell.org.uk//\">Alan&nbsp;J.&nbsp;Flavell</a>.\n");
	out.print  ("Partly a character set tutorial, partly a discussion of specific\n");
	out.print  ("(especially ISO 8859 and HTML related) issues in depth.\n");
	out.print  ("<!--\n");
	out.print  ("<li> <a href=\n");
	out.print  ("\"http://www.stadlar.is/TC304/GUIDE/\"\n");
	out.print  ("     ><cite>Guide to the use of character sets in Europe </cite></a>.\n");
	out.print  ("     A draft which contains explanations of basic concepts\n");
	out.print  ("     related to character sets in general and discusses various\n");
	out.print  ("     European standards.  \n");
	out.print  ("-->\n");
	out.print  ("<li> Section <cite><a href=\"http://web.archive.org/web/20030605114512/http://www.diffuse.org/chars.html\"\n");
	out.print  ("     >Character set standards</a></cite> in the\n");
	out.print  ("     <cite><a href=\"http://web.archive.org/web/20030605114512/http://www.diffuse.org/standards.html\"\n");
	out.print  ("     >Standards and Specifications List</a></cite>\n");
	out.print  ("by     <a href=\"http://web.archive.org/web/20030605114512/http://www.diffuse.org/\">Diffuse</a>\n");
	out.print  ("(archive copy)\n");
	out.print  ("<li><cite><a href=\"http://web.archive.org/web/20030605114512/http://www.diffuse.org/charguide.html\"\n");
	out.print  (">Guide to Character Sets</a></cite>, by Diffuse. (archive copy)\n");
	out.print  ("<LI>     \n");
	out.print  ("<a href=\"http://directory.google.com/\">Google</a>'s \n");
	out.print  ("<a href=\n");
	out.print  ("\"http://directory.google.com/Top/Computers/Software/Globalization/Internationalization/\">section on internalization</a>, which has interesting\n");
	out.print  ("entries like\n");
	out.print  ("<a href=\"http://www.i18ngurus.com/index.html\"\n");
	out.print  ("title=\"Open directory of links to internationalization (i18n) resources and related material.\"\n");
	out.print  (">i18nGurus</a>\n");
	out.print  ("<li> <a href=\"http://www.w3.org/MarkUp/html-spec/charset-harmful.html\">\n");
	out.print  ("<cite>\"Character Set\" Considered Harmful</cite></a>\n");
	out.print  ("by\n");
	out.print  ("<a href=\"http://www.w3.org/People/Connolly/\">Dan Connolly</a>.\n");
	out.print  ("     A good discussion of the basic concepts and misconceptions.\n");
	out.print  ("<li> <a href=\"http://linguist.emich.edu/%7elinglit/routledge/multilingual.html\"\n");
	out.print  ("     ><cite>The Nature of Linguistic Data: Multilingual Computing</cite></a> - an old (1997)\n");
	out.print  ("     collection of annotated links to information on character codes, fonts, etc.\n");
	out.print  ("<li> John Clews:\n");
	out.print  ("     <a href=\n");
	out.print  ("\"http://mirrored.ukoln.ac.uk/lis-journals/dlib/dlib/dlib/march97/sesame/03clews.html\"\n");
	out.print  ("     ><cite>Digital Language Access:\n");
	out.print  ("     Scripts, Transliteration, and Computer Access</cite></a>;\n");
	out.print  ("     an introduction to scripts and transliteration, so it's useful\n");
	out.print  ("     background information for character code issues. \n");
	out.print  ("<li> <a href=\n");
	out.print  ("\"http://www.evertype.com/\"\n");
	out.print  (">Michael Everson's Web site</a>,\n");
	out.print  ("     which contains a lot of links to detailed documents on character\n");
	out.print  ("     code issues, especially progress and proposals in standardization.\n");
	out.print  ("<li> <a href=\"http://www.terena.nl/library/multiling/euroml/JWvanWingen.html\">Johan W. van Wingen</a>:\n");
	out.print  ("     <a href=\"http://www.terena.nl/library/multiling/euroml/mlcs5.html\"\n");
	out.print  ("     ><cite>Character sets. Letters, tokens and codes.</cite></a>\n");
	out.print  ("Detailed information on many topics (including particular character codes).\n");
	out.print  ("<!--\n");
	out.print  ("now: http://web.archive.org/web/20040107013243/http://wes.feec.vutbr.cz/~kuchta/cp/esej.html\n");
	out.print  ("<li> <a href=\"http://www.fee.vutbr.cz/~kuchta/\">Jiri Kuchta</a>:\n");
	out.print  ("<cite><a href=\n");
	out.print  ("\"http://www.fee.vutbr.cz/~kuchta/cp/esej.html.iso-8859-1\"\n");
	out.print  (">Survey of code page history</a></cite>.\n");
	out.print  ("-->\n");
	out.print  ("<li> Steven J. Searle: <cite><a href=\n");
	out.print  ("\"http://tronweb.super-nova.co.jp/characcodehist.html\"\n");
	out.print  (">A Brief History of Character Codes\n");
	out.print  ("     in\n");
	out.print  ("     North America, Europe, and East Asia</a></cite>\n");
	out.print  ("<li> <a href=\"http://www.oreilly.com/catalog/cjkvinfo/author.html\">\n");
	out.print  ("     Ken Lunde</a>:\n");
	out.print  ("     <cite><a href=\"http://www.oreilly.com/catalog/cjkvinfo/\"\n");
	out.print  ("     >CJKV Information Processing</a></cite>. A book on\n");
	out.print  ("     Chinese, Japanese, Korean&nbsp;&amp; Vietnamese Computing.\n");
	out.print  ("     The book itself is not online, but some extracts are, e.g. the\n");
	out.print  ("<a href=\"ftp://ftp.oreilly.com/pub/examples/nutshell/cjkv/pdf/CJKVInfoProc.Chap1.pdf\"\n");
	out.print  ("title=\"CJKV Information Processing Overview [PDF format]\">overview</a> chapter.\n");
	out.print  ("<li> <a href=\"http://www.eki.ee/letter/\">An online character database</a> by\n");
	out.print  ("Indrek Hein at the\n");
	out.print  ("<a href=\"http://www.eki.ee/\">Institute of the Estonian Language</a>.\n");
	out.print  ("You can e.g. search for\n");
	out.print  ("     <a href=\"#10646\">Unicode</a> characters by name or code position,\n");
	out.print  ("     get lists of differences between some character sets,\n");
	out.print  ("     and get lists of characters needed for different languages.\n");
	out.print  ("<li> <a href=\n");
	out.print  ("\"http://recode.progiciels-bpi.ca/\"\n");
	out.print  ("     ><cite>Free recode</cite></a> is a free program\n");
	out.print  ("by <a href=\"http://www.iro.umontreal.ca/%7epinard/index.html\" hreflang=\"fr\"\n");
	out.print  ("lang=\"fr\"\n");
	out.print  ("    >François Pinard</a>.\n");
	out.print  ("It can be used to perform various character code conversions between a large number\n");
	out.print  ("of encodings.\n");
	out.print  ("</UL>  \n");
	out.print  ("\n");
	out.print  ("<p>Character code problems are part of a topic called\n");
	out.print  ("<dfn>internationalization</dfn> (jocularly abbreviated as\n");
	out.print  ("<dfn>i18n</dfn>), rather misleadingly, because it mainly revolves\n");
	out.print  ("around the problems of using various <em>languages and\n");
	out.print  ("writing systems (scripts)</em>. (Typically\n");
	out.print  ("<a href=\"lingua-franca.html\"\n");
	out.print  ("title=\"English - the universal language on the Internet?\"\n");
	out.print  ("><em>international</em> communication on the Internet is\n");
	out.print  ("carried out in English</a>!)\n");
	out.print  ("It includes difficult questions like text directionality\n");
	out.print  ("(some languages are written right to left) and requirements to\n");
	out.print  ("present the same character with different glyphs according to\n");
	out.print  ("its context.\n");
	out.print  ("See\n");
	out.print  ("<a href=\"http://www.w3.org/International/\">W3C pages on\n");
	out.print  ("internationalization</a>.</p>\n");
	out.print  ("\n");
	out.print  ("<P>I originally started writing this document as a tutorial\n");
	out.print  ("for <strong>HTML authors</strong>.\n");
	out.print  ("Later I noticed that this general information\n");
	out.print  ("is extensive enough to be put into a document of its own.\n");
	out.print  ("As regards to HTML specific problems, the document\n");
	out.print  ("<a href=\"html/chars.html\"><cite>Using national and special\n");
	out.print  ("characters in HTML</cite></a>\n");
	out.print  ("summarizes what currently seems to be the best alternative\n");
	out.print  ("in the general case.\n");
	out.print  ("</P>\n");
	out.print  ("\n");
	out.print  ("<hr title=\"Information about this document\">\n");
	out.print  ("<h2 style=\"font-size:100%;margin:0\">Acknowledgements</h2>\n");
	out.print  ("<p>I have learned a lot about character set issues from\n");
	out.print  ("the following people (listed in an order which is\n");
	out.print  ("roughly chronological \n");
	out.print  ("by the start of their influence on my understanding of these things): \n");
	out.print  ("<A HREFLANG=\"fi\" LANG=\"fi\"\n");
	out.print  (" HREF=\"http://www.nixu.fi/%7ekiravuo/\">Timo Kiravuo</A>,\n");
	out.print  ("<A HREF=\"http://www.alanflavell.org.uk/\">Alan&nbsp;J.&nbsp;Flavell</a>,\n");
	out.print  ("<A HREF=\"mailto:aray@interactrx.com\">Arjun Ray</A>,\n");
	out.print  ("Roman Czyborra,\n");
	out.print  ("<a href=\"http://web.archive.org/web/20041012084602/http://www.bobbemer.com/\">Bob Bemer</a>,\n");
	out.print  ("<!--\n");
	out.print  ("\"http://www.tieke.fi/tieke.nsf/DUID/5327E5F9C8E43491C22569D00045F9C4?OpenDocument\"-->\n");
	out.print  ("<span lang=\"fi\"\n");
	out.print  (">Erkki&nbsp;I. Kolehmainen</span>.\n");
	out.print  ("(But any errors in this document I souped up\n");
	out.print  ("by myself.)</P>\n");
	out.print  ("\n");
	out.print  ("<div class=\"footer\">\n");
	out.print  ("<div><A TITLE=\n");
	out.print  ("\"ISO 8601, the date and time representation standard\"\n");
	out.print  ("HREF=\"iso8601.html\">\n");
	out.print  ("Date</A> of last revision: 2001-09-06.\n");
	out.print  ("Date of last update: \n");
	out.print  ("2012-02-27.\n");
	out.print  ("Minor modifications 2017-10-21.\n");
	out.print  ("Link fixes 2018-10-14 and 2018-10-16.\n");
	out.print  ("</div>\n");
	out.print  ("\n");
	out.print  ("<div>This page belongs\n");
	out.print  ("to section\n");
	out.print  ("<cite><a href=\"chars/index.html\">Characters and encodings</a></cite> of\n");
	out.print  ("the free information site\n");
	out.print  ("<cite><a accesskey=\"1\" href=\"indexen.html\">IT and communication</a></cite>\n");
	out.print  ("by\n");
	out.print  ("<a href=\"personal.html\" title=\n");
	out.print  ("\"Jukka K. Korpela, an IT generalist and specialist (personal home page)\"\n");
	out.print  ("><span lang=\"fi\">Jukka</span> \"Yucca\" <span lang=\"fi\">Korpela</span></a>.\n");
	out.print  ("</div>\n");
	out.print  ("</div>\n");
	out.print  ("</body>\n");
	out.print  ("</html>\n");

	} //~render end
}
