import java.util.Hashtable;
/**
* Collection of static methods to convert special and extended
* characters into HTML entitities and vice versa.
* Copyright (c) 2004-2005 Tecnick.com S.r.l (www.tecnick.com) Via Ugo Foscolo
* n.19 - 09045 Quartu Sant'Elena (CA) - ITALY - www.tecnick.com -
* info@tecnick.com
*
Project homepage: http://htmlentities.sourceforge.net
* License: http://www.gnu.org/copyleft/lesser.html LGPL
* @author Nicola Asuni [www.tecnick.com].
* @version 1.0.004
*/
public class HTMLEntities {
/**
* Translation table for HTML entities.
*
reference: W3C - Character entity references in HTML 4 [http://www.w3.org/TR/html401/sgml/entities.html].
*/
private static final Object[][] html_entities_table = {
{ new String("Á"), new Integer(193) },
{ new String("á"), new Integer(225) },
{ new String("Â"), new Integer(194) },
{ new String("â"), new Integer(226) },
{ new String("´"), new Integer(180) },
{ new String("Æ"), new Integer(198) },
{ new String("æ"), new Integer(230) },
{ new String("À"), new Integer(192) },
{ new String("à"), new Integer(224) },
{ new String("ℵ"), new Integer(8501) },
{ new String("Α"), new Integer(913) },
{ new String("α"), new Integer(945) },
{ new String("&"), new Integer(38) },
{ new String("∧"), new Integer(8743) },
{ new String("∠"), new Integer(8736) },
{ new String("Å"), new Integer(197) },
{ new String("å"), new Integer(229) },
{ new String("≈"), new Integer(8776) },
{ new String("Ã"), new Integer(195) },
{ new String("ã"), new Integer(227) },
{ new String("Ä"), new Integer(196) },
{ new String("ä"), new Integer(228) },
{ new String("„"), new Integer(8222) },
{ new String("Β"), new Integer(914) },
{ new String("β"), new Integer(946) },
{ new String("¦"), new Integer(166) },
{ new String("•"), new Integer(8226) },
{ new String("∩"), new Integer(8745) },
{ new String("Ç"), new Integer(199) },
{ new String("ç"), new Integer(231) },
{ new String("¸"), new Integer(184) },
{ new String("¢"), new Integer(162) },
{ new String("Χ"), new Integer(935) },
{ new String("χ"), new Integer(967) },
{ new String("ˆ"), new Integer(710) },
{ new String("♣"), new Integer(9827) },
{ new String("≅"), new Integer(8773) },
{ new String("©"), new Integer(169) },
{ new String("↵"), new Integer(8629) },
{ new String("∪"), new Integer(8746) },
{ new String("¤"), new Integer(164) },
{ new String("†"), new Integer(8224) },
{ new String("‡"), new Integer(8225) },
{ new String("↓"), new Integer(8595) },
{ new String("⇓"), new Integer(8659) },
{ new String("°"), new Integer(176) },
{ new String("Δ"), new Integer(916) },
{ new String("δ"), new Integer(948) },
{ new String("♦"), new Integer(9830) },
{ new String("÷"), new Integer(247) },
{ new String("É"), new Integer(201) },
{ new String("é"), new Integer(233) },
{ new String("Ê"), new Integer(202) },
{ new String("ê"), new Integer(234) },
{ new String("È"), new Integer(200) },
{ new String("è"), new Integer(232) },
{ new String("∅"), new Integer(8709) },
{ new String(" "), new Integer(8195) },
{ new String(" "), new Integer(8194) },
{ new String("Ε"), new Integer(917) },
{ new String("ε"), new Integer(949) },
{ new String("≡"), new Integer(8801) },
{ new String("Η"), new Integer(919) },
{ new String("η"), new Integer(951) },
{ new String("Ð"), new Integer(208) },
{ new String("ð"), new Integer(240) },
{ new String("Ë"), new Integer(203) },
{ new String("ë"), new Integer(235) },
{ new String("€"), new Integer(8364) },
{ new String("∃"), new Integer(8707) },
{ new String("ƒ"), new Integer(402) },
{ new String("∀"), new Integer(8704) },
{ new String("½"), new Integer(189) },
{ new String("¼"), new Integer(188) },
{ new String("¾"), new Integer(190) },
{ new String("⁄"), new Integer(8260) },
{ new String("Γ"), new Integer(915) },
{ new String("γ"), new Integer(947) },
{ new String("≥"), new Integer(8805) },
{ new String("↔"), new Integer(8596) },
{ new String("⇔"), new Integer(8660) },
{ new String("♥"), new Integer(9829) },
{ new String("…"), new Integer(8230) },
{ new String("Í"), new Integer(205) },
{ new String("í"), new Integer(237) },
{ new String("Î"), new Integer(206) },
{ new String("î"), new Integer(238) },
{ new String("¡"), new Integer(161) },
{ new String("Ì"), new Integer(204) },
{ new String("ì"), new Integer(236) },
{ new String("ℑ"), new Integer(8465) },
{ new String("∞"), new Integer(8734) },
{ new String("∫"), new Integer(8747) },
{ new String("Ι"), new Integer(921) },
{ new String("ι"), new Integer(953) },
{ new String("¿"), new Integer(191) },
{ new String("∈"), new Integer(8712) },
{ new String("Ï"), new Integer(207) },
{ new String("ï"), new Integer(239) },
{ new String("Κ"), new Integer(922) },
{ new String("κ"), new Integer(954) },
{ new String("Λ"), new Integer(923) },
{ new String("λ"), new Integer(955) },
{ new String("⟨"), new Integer(9001) },
{ new String("«"), new Integer(171) },
{ new String("←"), new Integer(8592) },
{ new String("⇐"), new Integer(8656) },
{ new String("⌈"), new Integer(8968) },
{ new String("“"), new Integer(8220) },
{ new String("≤"), new Integer(8804) },
{ new String("⌊"), new Integer(8970) },
{ new String("∗"), new Integer(8727) },
{ new String("◊"), new Integer(9674) },
{ new String(""), new Integer(8206) },
{ new String("‹"), new Integer(8249) },
{ new String("‘"), new Integer(8216) },
{ new String("¯"), new Integer(175) },
{ new String("—"), new Integer(8212) },
{ new String("µ"), new Integer(181) },
{ new String("·"), new Integer(183) },
{ new String("−"), new Integer(8722) },
{ new String("Μ"), new Integer(924) },
{ new String("μ"), new Integer(956) },
{ new String("∇"), new Integer(8711) },
{ new String(" "), new Integer(160) },
{ new String("–"), new Integer(8211) },
{ new String("≠"), new Integer(8800) },
{ new String("∋"), new Integer(8715) },
{ new String("¬"), new Integer(172) },
{ new String("∉"), new Integer(8713) },
{ new String("⊄"), new Integer(8836) },
{ new String("Ñ"), new Integer(209) },
{ new String("ñ"), new Integer(241) },
{ new String("Ν"), new Integer(925) },
{ new String("ν"), new Integer(957) },
{ new String("Ó"), new Integer(211) },
{ new String("ó"), new Integer(243) },
{ new String("Ô"), new Integer(212) },
{ new String("ô"), new Integer(244) },
{ new String("Œ"), new Integer(338) },
{ new String("œ"), new Integer(339) },
{ new String("Ò"), new Integer(210) },
{ new String("ò"), new Integer(242) },
{ new String("‾"), new Integer(8254) },
{ new String("Ω"), new Integer(937) },
{ new String("ω"), new Integer(969) },
{ new String("Ο"), new Integer(927) },
{ new String("ο"), new Integer(959) },
{ new String("⊕"), new Integer(8853) },
{ new String("∨"), new Integer(8744) },
{ new String("ª"), new Integer(170) },
{ new String("º"), new Integer(186) },
{ new String("Ø"), new Integer(216) },
{ new String("ø"), new Integer(248) },
{ new String("Õ"), new Integer(213) },
{ new String("õ"), new Integer(245) },
{ new String("⊗"), new Integer(8855) },
{ new String("Ö"), new Integer(214) },
{ new String("ö"), new Integer(246) },
{ new String("¶"), new Integer(182) },
{ new String("∂"), new Integer(8706) },
{ new String("‰"), new Integer(8240) },
{ new String("⊥"), new Integer(8869) },
{ new String("Φ"), new Integer(934) },
{ new String("φ"), new Integer(966) },
{ new String("Π"), new Integer(928) },
{ new String("π"), new Integer(960) },
{ new String("ϖ"), new Integer(982) },
{ new String("±"), new Integer(177) },
{ new String("£"), new Integer(163) },
{ new String("′"), new Integer(8242) },
{ new String("″"), new Integer(8243) },
{ new String("∏"), new Integer(8719) },
{ new String("∝"), new Integer(8733) },
{ new String("Ψ"), new Integer(936) },
{ new String("ψ"), new Integer(968) },
{ new String("""), new Integer(34) },
{ new String("√"), new Integer(8730) },
{ new String("⟩"), new Integer(9002) },
{ new String("»"), new Integer(187) },
{ new String("→"), new Integer(8594) },
{ new String("⇒"), new Integer(8658) },
{ new String("⌉"), new Integer(8969) },
{ new String("”"), new Integer(8221) },
{ new String("ℜ"), new Integer(8476) },
{ new String("®"), new Integer(174) },
{ new String("⌋"), new Integer(8971) },
{ new String("Ρ"), new Integer(929) },
{ new String("ρ"), new Integer(961) },
{ new String(""), new Integer(8207) },
{ new String("›"), new Integer(8250) },
{ new String("’"), new Integer(8217) },
{ new String("‚"), new Integer(8218) },
{ new String("Š"), new Integer(352) },
{ new String("š"), new Integer(353) },
{ new String("⋅"), new Integer(8901) },
{ new String("§"), new Integer(167) },
{ new String(""), new Integer(173) },
{ new String("Σ"), new Integer(931) },
{ new String("σ"), new Integer(963) },
{ new String("ς"), new Integer(962) },
{ new String("∼"), new Integer(8764) },
{ new String("♠"), new Integer(9824) },
{ new String("⊂"), new Integer(8834) },
{ new String("⊆"), new Integer(8838) },
{ new String("∑"), new Integer(8721) },
{ new String("¹"), new Integer(185) },
{ new String("²"), new Integer(178) },
{ new String("³"), new Integer(179) },
{ new String("⊃"), new Integer(8835) },
{ new String("⊇"), new Integer(8839) },
{ new String("ß"), new Integer(223) },
{ new String("Τ"), new Integer(932) },
{ new String("τ"), new Integer(964) },
{ new String("∴"), new Integer(8756) },
{ new String("Θ"), new Integer(920) },
{ new String("θ"), new Integer(952) },
{ new String("ϑ"), new Integer(977) },
{ new String(" "), new Integer(8201) },
{ new String("Þ"), new Integer(222) },
{ new String("þ"), new Integer(254) },
{ new String("˜"), new Integer(732) },
{ new String("×"), new Integer(215) },
{ new String("™"), new Integer(8482) },
{ new String("Ú"), new Integer(218) },
{ new String("ú"), new Integer(250) },
{ new String("↑"), new Integer(8593) },
{ new String("⇑"), new Integer(8657) },
{ new String("Û"), new Integer(219) },
{ new String("û"), new Integer(251) },
{ new String("Ù"), new Integer(217) },
{ new String("ù"), new Integer(249) },
{ new String("¨"), new Integer(168) },
{ new String("ϒ"), new Integer(978) },
{ new String("Υ"), new Integer(933) },
{ new String("υ"), new Integer(965) },
{ new String("Ü"), new Integer(220) },
{ new String("ü"), new Integer(252) },
{ new String("℘"), new Integer(8472) },
{ new String("Ξ"), new Integer(926) },
{ new String("ξ"), new Integer(958) },
{ new String("Ý"), new Integer(221) },
{ new String("ý"), new Integer(253) },
{ new String("¥"), new Integer(165) },
{ new String("ÿ"), new Integer(255) },
{ new String("Ÿ"), new Integer(376) },
{ new String("Ζ"), new Integer(918) },
{ new String("ζ"), new Integer(950) },
{ new String(""), new Integer(8205) },
{ new String(""), new Integer(8204) } };
/**
* Map to convert extended characters in html entities.
*/
private static final Hashtable htmlentities_map = new Hashtable();
/**
* Map to convert html entities in exteden characters.
*/
private static final Hashtable unhtmlentities_map = new Hashtable();
//==============================================================================
// METHODS
//==============================================================================
/**
* Initialize HTML translation maps.
*/
public HTMLEntities() {
initializeEntitiesTables();
}
/**
* Initialize HTML entities table.
*/
private static void initializeEntitiesTables() {
// initialize html translation maps
for (int i = 0; i < html_entities_table.length; ++i) {
htmlentities_map.put(html_entities_table[i][1],
html_entities_table[i][0]);
unhtmlentities_map.put(html_entities_table[i][0],
html_entities_table[i][1]);
}
}
/**
* Get the html entities translation table.
*
* @return translation table
*/
public static Object[][] getEntitiesTable() {
return html_entities_table;
}
/**
* Convert special and extended characters into HTML entitities.
* @param str input string
* @return formatted string
* @see #unhtmlentities(String)
*/
public static String htmlentities(String str) {
if (str == null) {
return "";
}
//initialize html translation maps table the first time is called
if (htmlentities_map.isEmpty()) {
initializeEntitiesTables();
}
StringBuffer buf = new StringBuffer(); //the otput string buffer
for (int i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
String entity = (String) htmlentities_map.get(new Integer((int) ch)); //get equivalent html entity
if (entity == null) { //if entity has not been found
if (((int) ch) > 128) { //check if is an extended character
buf.append("&#" + ((int) ch) + ";"); //convert extended character
} else {
buf.append(ch); //append the character as is
}
} else {
buf.append(entity); //append the html entity
}
}
return buf.toString();
}
/**
* Convert HTML entities to special and extended unicode characters
* equivalents.
* @param str input string
* @return formatted string
* @see #htmlentities(String)
*/
public static String unhtmlentities(String str) {
//initialize html translation maps table the first time is called
if (htmlentities_map.isEmpty()) {
initializeEntitiesTables();
}
StringBuffer buf = new StringBuffer();
for (int i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
if (ch == '&') {
int semi = str.indexOf(';', i + 1);
if ((semi == -1) || ((semi-i) > 7)){
buf.append(ch);
continue;
}
String entity = str.substring(i, semi + 1);
Integer iso;
if (entity.charAt(1) == ' ') {
buf.append(ch);
continue;
}
if (entity.charAt(1) == '#') {
if (entity.charAt(2) == 'x') {
iso = new Integer(Integer.parseInt(entity.substring(3, entity.length() - 1), 16));
}
else {
iso = new Integer(entity.substring(2, entity.length() - 1));
}
} else {
iso = (Integer) unhtmlentities_map.get(entity);
}
if (iso == null) {
buf.append(entity);
} else {
buf.append((char) (iso.intValue()));
}
i = semi;
} else {
buf.append(ch);
}
}
return buf.toString();
}
// methods to convert special characters
/**
* Replace single quotes characters with HTML entities.
*
* @param str the input string
* @return string with replaced single quotes
*/
public static String htmlSingleQuotes(String str) {
str = str.replaceAll("[\']", "’");
str = str.replaceAll("'", "’");
str = str.replaceAll("‘", "’");
str = str.replaceAll("’", "’");
return str;
}
/**
* Replace single quotes HTML entities with equivalent character.
*
* @param str the input string
* @return string with replaced single quotes
*/
public static String unhtmlSingleQuotes(String str) {
return str.replaceAll("’", "\'");
}
/**
* Replace double quotes characters with HTML entities.
*
* @param str the input string
* @return string with replaced double quotes
*/
public static String htmlDoubleQuotes(String str) {
str = str.replaceAll("[\"]", """);
str = str.replaceAll("“", """);
str = str.replaceAll("”", """);
return str;
}
/**
* Replace single quotes HTML entities with equivalent character.
*
* @param str the input string
* @return string with replaced single quotes
*/
public static String unhtmlDoubleQuotes(String str) {
return str.replaceAll(""", "\"");
}
/**
* Replace single and double quotes characters with HTML entities.
*
* @param str the input string
* @return string with replaced quotes
*/
public static String htmlQuotes(String str) {
str = htmlDoubleQuotes(str); //convert double quotes
str = htmlSingleQuotes(str); //convert single quotes
return str;
}
/**
* Replace single and double quotes HTML entities with equivalent characters.
*
* @param str the input string
* @return string with replaced quotes
*/
public static String unhtmlQuotes(String str) {
str = unhtmlDoubleQuotes(str); //convert double quotes
str = unhtmlSingleQuotes(str); //convert single quotes
return str;
}
/**
* Replace < > characters with < > entities.
*
* @param str the input string
* @return string with replaced characters
*/
public static String htmlAngleBrackets(String str) {
str = str.replaceAll("<", "<");
str = str.replaceAll(">", ">");
return str;
}
/**
* Replace < > entities with < > characters.
*
* @param str the input string
* @return string with replaced entities
*/
public static String unhtmlAngleBrackets(String str) {
str = str.replaceAll("<", "<");
str = str.replaceAll(">", ">");
return str;
}
/**
* Replace & characters with & HTML entities.
*
* @param str the input string
* @return string with replaced characters
*/
public static String htmlAmpersand(String str) {
return str.replaceAll("&", "&");
}
/**
* Replace & HTML entities with & characters.
*
* @param str the input string
* @return string with replaced entities
*/
public static String unhtmlAmpersand(String str) {
return str.replaceAll("&", "&");
}
}