#entify.awk: Change Latin-1 high alphabetics to HTML entities for 7-bit safety.
#
# Typical use:
#
#   awk -f entify.awk infile.8bit > outfile.html
#
#   If you have Latin 1 characters in a URL, they should actually be escaped
# using the %-hex-digits convention; the program ignores this consideration.
#
#   This program is written in the ``awk'' programming language (on Sun systems
# and some others, non-archaic ``awk'' is called ``nawk'', so that ``nawk''
# should be used instead of ``awk'').  Also, a freely-redistributable ``awk''
# interpreter called ``gawk'', which is free of the bugs that some of the
# vendor-supplied ``awk''/``nawk'' programs suffer from, is available for most
# platforms, and as source from the FSF GNU project.
#
# Copyright H. Churchyard 1994 -- freely redistributable.
# Version 1.0 11/27/94
# Version 1.1 2/17/95 -- Eliminated warning, may not have been appropriate in
# all circumstances.
#
#This will test the 8-bit-cleanliness of your awk:
BEGIN{
enty["\300"]="\\&Agrave;";enty["\301"]="\\&Aacute;";enty["\302"]="\\&Acirc;";
enty["\303"]="\\&Atilde;";enty["\304"]="\\&Auml;";enty["\305"]="\\&Aring;";
enty["\306"]="\\&AElig;";enty["\307"]="\\&Ccedil;";enty["\310"]="\\&Egrave;";
enty["\311"]="\\&Eacute;";enty["\312"]="\\&Ecirc;";enty["\313"]="\\&Euml;";
enty["\314"]="\\&Igrave;";enty["\315"]="\\&Iacute;";enty["\316"]="\\&Icirc;";
enty["\317"]="\\&Iuml;";enty["\320"]="\\&ETH;";enty["\321"]="\\&Ntilde;";
enty["\322"]="\\&Ograve;";enty["\323"]="\\&Oacute;";enty["\324"]="\\&Ocirc;";
enty["\325"]="\\&Otilde;";enty["\326"]="\\&Ouml;";enty["\330"]="\\&Oslash;";
enty["\331"]="\\&Ugrave;";enty["\332"]="\\&Uacute;";enty["\333"]="\\&Ucirc;";
enty["\334"]="\\&Uuml;";enty["\335"]="\\&Yacute;";enty["\336"]="\\&THORN;";
enty["\337"]="\\&szlig;";enty["\340"]="\\&agrave;";enty["\341"]="\\&aacute;";
enty["\342"]="\\&acirc;";enty["\343"]="\\&atilde;";enty["\344"]="\\&auml;";
enty["\345"]="\\&aring;";enty["\346"]="\\&aelig;";enty["\347"]="\\&ccedil;";
enty["\350"]="\\&egrave;";enty["\351"]="\\&eacute;";enty["\352"]="\\&ecirc;";
enty["\353"]="\\&euml;";enty["\354"]="\\&igrave;";enty["\355"]="\\&iacute;";
enty["\356"]="\\&icirc;";enty["\357"]="\\&iuml;";enty["\360"]="\\&eth;";
enty["\361"]="\\&ntilde;";enty["\362"]="\\&ograve;";
enty["\363"]="\\&oacute;";enty["\364"]="\\&ocirc;";enty["\365"]="\\&otilde;";
enty["\366"]="\\&ouml;";enty["\370"]="\\&oslash;";enty["\371"]="\\&ugrave;";
enty["\372"]="\\&uacute;";enty["\373"]="\\&ucirc;";enty["\374"]="\\&uuml;";
enty["\375"]="\\&yacute;";enty["\376"]="\\&thorn;";enty["\377"]="\\&yuml;";
enty["\256"]="\\&reg;";enty["\251"]="\\&copy;";
}
#
# Main
#
{if ($0~/[\300-\377]/)
   {for (x in enty) {if ($0~x) {gsub(x,enty[x])}}};
 print}
##EOF
