Description: Cases
where extended characters are not identified by their browser, we need to use
their HTML equivalent code.
We can convert extended characters to their html
equivalent code using ConvertExtendedCharactersToHtmlCode()
mentioned in blog so that they will not appear as junk characters in browser.
The extended ASCII codes (character code 128-255)
Examples:
DEC
|
OCT
|
HEX
|
BIN
|
Symbol
|
HTML Number
|
HTML Name
|
Description
|
128
|
200
|
80
|
10000000
|
€
|
€
|
€
|
Euro sign
|
129
|
201
|
81
|
10000001
|
|
|
|
|
130
|
202
|
82
|
10000010
|
‚
|
‚
|
‚
|
Single low-9 quotation mark
|
131
|
203
|
83
|
10000011
|
ƒ
|
ƒ
|
ƒ
|
Latin small letter f with hook
|
132
|
204
|
84
|
10000100
|
„
|
„
|
„
|
Double low-9 quotation mark
|
133
|
205
|
85
|
10000101
|
…
|
…
|
…
|
Horizontal ellipsis
|
134
|
206
|
86
|
10000110
|
†
|
†
|
†
|
Dagger
|
135
|
207
|
87
|
10000111
|
‡
|
‡
|
‡
|
Double dagger
|
Complete list is available on http://www.ascii-code.com/
·
Methods:
ConvertExtendedCharactersToHtmlCode()
.public
static HtmlDocument
ConvertExtendedCharactersToHtmlCode(HtmlDocument
doc)
{
string
HtmlString = doc.DocumentNode.OuterHtml;
try
{
Regex
r = new Regex(@"[a-zA-Z0-9<>~!@#–%^&*()—ùú_+-=`,./?
""\|}{\]\[]", RegexOptions.None);
string
SpecialCharacters = r.Replace(HtmlString, string.Empty;
StringBuilder
sb = new StringBuilder(SpecialCharacters);
for
(int i = 0; i < sb.Length; i++)
{
int
c = (int)sb[i];
if
(c >= 176 && c <= 254) //Junk
characters handling
{
HtmlString =
HtmlString.Replace(sb[i].ToString(), "&#"
+ c + ";");
}
else
{
if
(c == 61600 || c == 61478 || c == 61475 || c == 61488 || c == 61499 || c ==
61537) // Removed for bug no. || c == 61560)
{
HtmlString =
HtmlString.Replace(sb[i].ToString(), " ");
}
else if (c >= 128) //Junk characters handling
{
HtmlString =
HtmlString.Replace(sb[i].ToString(), "&#"
+ c + ";");
}
}
}
doc.LoadHtml(HtmlString);
}
catch
(Exception ex)
{
//throw
ex;
}
return
doc;
}
No comments:
Post a Comment