XML interfaces fixes

- 4220: gracefully convert any unicode chars to ISO-8859-1 instead of
  truncating at unknown chars
- 4171: GC waypoint = charset conversion error
- correct indentation of attribute lines
This commit is contained in:
Peter Mandrella
2012-07-23 01:43:19 +02:00
committed by following
parent 68718da6b5
commit 1fe16390fe
4 changed files with 93 additions and 11 deletions

View File

@ -0,0 +1,82 @@
<?php
/****************************************************************************
./lib/charset.inc.php
--------------------
begin : July 7, 2012
For license information see doc/license.txt
****************************************************************************/
/****************************************************************************
Unicode Reminder メモ
charset related functions
****************************************************************************/
// replacement table for Unicode 0x100 to 0x1FF
$utf_xlatin = "A a A a A a C c C c C c C c D d D d E e E e E e E e E e G g G g " .
"G g G g H h H h I i I i I i I i I i IJijJ j K k K L l L l L l L " .
"l L l N n N n N n n n n O o O o Ö ö OEoeR r R r R r S s S s S s " .
"S s T t T t T t U u U u U u U u Ü ü U u W w Y y Y Z z Z z Z z ";
// replacement table for Unicode 0x2000 to 0x203F
$utf_punct = " ------|_'','\"\"\"\"++*>.... %%´\"\"`\"\"^<> !?- ";
// convert utf-8 string to iso-8859-1 and use replacemend characters if possible
function utf8ToIso88591($s)
{
global $utf_xlatin, $utf_punct;
$pos = 0;
$result = "";
while ($pos < strlen($s))
{
$c1 = ord($s[$pos++]);
if ($c1 < 0xC0)
$result .= chr($c1);
else if ($pos < strlen($s))
{
$c2 = ord($s[$pos++]);
if ($c1 < 0xE0)
{
$code = 0x40 * ($c1 & 0x1F) + ($c2 & 0x3F);
if ($code < 0x100)
$result .= chr($code);
else if ($code < 0x200)
{
$result .= $utf_xlatin[2*($code - 0x100)];
if ($utf_xlatin[2*($code - 0x100) + 1] != ' ')
$result .= $utf_xlatin[2*($code - 0x100) + 1];
}
else
$result .= "?";
}
else if ($pos < strlen($s))
{
$c3 = ord($s[$pos++]);
$code = 0x1000 * ($c1 & 0x0F) + 0x40 * ($c2 & 0x3F) + ($c3 & 0x3F);
switch ($code)
{
case 0x2026 : $result .= "..."; break;
case 0x2025 : $result .= ".."; break;
case 0x20AC : $result .= "Euro"; break;
default:
if ($code >= 0x2000 && $code <= 0x203F)
$result .= $utf_punct[$code - 0x2000];
else
$result .= "?";
}
}
}
}
return $result;
}
?>

View File

@ -41,6 +41,7 @@
<li class="changelogitem">Empfehlungen gehen beim mehrfachen Loggen eines Caches &ndash; z.B. Fund + Hinweis &ndash; nicht mehr verloren.</li>
<li class="changelogitem">Empfehlungen gehen beim Löschen eines von mehreren Logs des gleichen Benutzers oder beim Bearbeiten von einem der Logs nicht mehr verloren.</li>
<li class="changelogitem">Mehrfachlogs eines Benutzers zählen bei der Bewertungsübersicht auf der Startseite nur noch einmal.</li>
<li class="changelogitem"><a href="doc/xml/">Das XML-Interface</a> schneidet im Standardzeichensatz keine Daten mehr bei unbekannten Zeichen ab.</li>
</ul>
<br />

View File

@ -40,6 +40,7 @@
<li class="changelogitem">Recommendations are no longer lost when logging a cache again, e.g. a note after a found log.</li>
<li class="changelogitem">Recommendations are no longer lost when deleting one of multiple logs of the same user, or when editing one of them.</li>
<li class="changelogitem">Multiple logs by the same used only count once at the homepage top ratings list.</li>
<li class="changelogitem"><a href="doc/xml/">XML-Interface</a> will not truncate default charset data at unknown characters.</li>
</ul>
<br />

View File

@ -14,6 +14,7 @@
$opt['rootpath'] = '../';
require($opt['rootpath'] . 'lib/common.inc.php');
require($opt['rootpath'] . 'lib/charset.inc.php');
if ($error == true)
{
echo 'Unable to connect to database';
@ -267,7 +268,7 @@
if ($bOcXmlTag == '1') $xmloutput .= '</ocxmlsession>';
if ($sCharset == 'iso-8859-1')
echo iconv('UTF-8', 'ISO-8859-1', $xmloutput);
echo utf8ToIso88591($xmloutput);
else if ($sCharset == 'utf-8')
echo $xmloutput;
@ -293,7 +294,7 @@
function outputXmlFile($sessionid, $filenr, $bXmlDecl, $bOcXmlTag, $bDocType, $ziptype)
{
global $zip_basedir, $zip_wwwdir, $sDateformat, $sDateshort, $t1, $t2, $safemode_zip, $safemode_zip, $sCharset, $bAttrlist, $absolute_server_URI;
global $zip_basedir, $zip_wwwdir, $sDateformat, $sDateshort, $t1, $t2, $t3, $safemode_zip, $safemode_zip, $sCharset, $bAttrlist, $absolute_server_URI;
// alle records aus tmpxml_* übertragen
if (!mb_ereg_match('^[0-9]{1,11}', $sessionid))
@ -376,7 +377,7 @@ function outputXmlFile($sessionid, $filenr, $bXmlDecl, $bOcXmlTag, $bDocType, $z
$fileid++;
$xmlfilename = $zip_basedir . 'ocxml11/' . $sessionid . '/' . $sessionid . '-' . $filenr . '-' . $fileid . '.xml';
$f = fopen($xmlfilename, 'w');
if ($bXmlDecl == '1')
@ -595,7 +596,7 @@ function outputXmlFile($sessionid, $filenr, $bXmlDecl, $bOcXmlTag, $bDocType, $z
// zippen und url-redirect
if ($ziptype == '0')
{
tpl_redirect($zip_wwwdir . 'ocxml11/' . $sessionid . '/' . $sessionid . '-' . $filenr . '-' . $fileid . '.xml');
tpl_redirect($zip_wwwdir . $rel_xmlfile);
exit;
}
else if ($ziptype == 'zip')
@ -916,8 +917,8 @@ function xmlcdata($str)
if ($bXmlCData == '1')
{
$str = mb_ereg_replace(']]>', ']] >', $str);
$str = output_convert($str);
$str = mb_ereg_replace(']]>', ']] >', $str);
return '<![CDATA[' . filterevilchars($str) . ']]>';
}
else
@ -1037,12 +1038,9 @@ function output_convert($str)
if ($sCharset == 'iso-8859-1')
{
if ($str != null)
{
$str = @iconv('UTF-8', 'ISO-8859-1', $str);
if ($str == false)
$str = '--- charset conversion error ---';
}
return $str;
return utf8ToIso88591($str);
else
return $str;
}
else if ($sCharset == 'utf-8')
return $str;