#include "Python.h"
#include "expat.h"
#include "expat_config.h"
#include "xmlchar.h"

#ifdef XML_UNICODE /* Expat uses UTF-16 */
PyObject *Unicode_FromXMLCharAndSize(const XML_Char *str, int size)
{
  int bo = (BYTEORDER == 1234) ? -1 : 1;
  return PyUnicode_DecodeUTF16((const char *)str, size<<1, NULL, &bo);
}

PyObject *Unicode_FromXMLChar(const XML_Char *str)
{
  int bo = (BYTEORDER == 1234) ? -1 : 1;
  const XML_Char *p = str;

  while (*p) p++;
  return PyUnicode_DecodeUTF16((const char *)str, (p - str)<<1, NULL, &bo);
}

static XML_Char *XMLChar_FromUnicode(Py_UNICODE *s, int size)
{
  XML_Char *result, *p;
#ifdef Py_UNICODE_WIDE /* UCS-4 */
  int i, pairs;
  
  for (i = pairs = 0; i < size; i++) {
    if (s[i] >= 0x10000) pairs++;
  }
  result = p = (XML_Char *) malloc((size + pairs + 1) * sizeof(XML_Char));
  if (result == NULL) return NULL;

  while (size-- > 0) {
    Py_UNICODE ch = *s++;
    if (ch >= 0x10000) {
      *p++ = (0xDC00 | ((ch - 0x10000) & 0x3FF));
      *p++ = (0xD800 | ((ch - 0x10000) >> 10));
    } else {
      *p++ = ch;
    }
  }

#else /* UCS-2 */
  /* Simple one-to-one copy of the Py_UNICODE data */
  result = p = (XML_Char *) malloc((size + 1) * sizeof(XML_Char));
  if (result == NULL) return NULL;

  while (size-- > 0) {
    *p++ = *s++;
  }
#endif

  /* Zero-terminate the XML_Char string for Expat */
  *p++ = 0;
  return result;
}

size_t XMLChar_Len(const XML_Char *s)
{
  const XML_Char *p = s;

  while (*p) p++;
  return (p - s);
}


int XMLChar_Cmp(const XML_Char *s1, const XML_Char *s2)
{
  while ((*s1 == *s2) && (*s1 != (XML_Char)'\0')) s1++, s2++;

  if (*s1 == *s2) return 0;
  else if (*s1 < *s2) return -1;
  else return 1;
}


int XMLChar_NCmp(const XML_Char *s1, const XML_Char *s2, size_t n)
{
  while (n > 0 && (*s1 == *s2) && (*s1 != (XML_Char)'\0')) n--, s1++, s2++;

  if (n == 0 || *s1 == *s2) return 0;
  else if (*s1 < *s2) return -1;
  else return 1;
}


#else /* Expat uses UTF-8 */

static XML_Char *XMLChar_FromUnicode(Py_UNICODE *s, int size)
{
  XML_Char *result, *p;
  int i;

  /* Overallocate and give the excess back at the end. */
  result = p = (XML_Char *) malloc((size * 4) * sizeof(XML_Char));
  if (result == NULL) return NULL;

  for (i = 0; i < size;) {
    Py_UNICODE ch = s[i++];

    if (ch < 0x80) {
      /* Encode ASCII */
      *p++ = (XML_Char) ch;
    }
    else if (ch < 0x0800) {
      /* Encode Latin-1 */
      *p++ = (XML_Char)(0xC0 | (ch >> 6));
      *p++ = (XML_Char)(0x80 | (ch & 0x3F));
    }
#ifdef Py_UNICODE_WIDE
    else if (ch >= 0x10000) {
      /* Encode UCS-4 Unicode ordinals */
      *p++ = (XML_Char)(0xF0 | (ch >> 18));
      *p++ = (XML_Char)(0x80 | ((ch >> 12) & 0x3F));
      *p++ = (XML_Char)(0x80 | ((ch >> 6) & 0x3F));
      *p++ = (XML_Char)(0x80 | (ch & 0x3F));
    }
#endif
    else {
      /* Encode UCS-2 Unicode ordinals */
      /* Check for high surrogate */
      if (0xD800 <= ch && ch <= 0xDBFF && i != size) {
        Py_UNICODE ch2 = s[i];
        /* Check for low surrogate and combine the two to
           form a UCS4 value */
        if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
          Py_UCS4 ucs4 = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
          *p++ = (XML_Char)(0xF0 | (ucs4 >> 18));
          *p++ = (XML_Char)(0x80 | ((ucs4 >> 12) & 0x3F));
          *p++ = (XML_Char)(0x80 | ((ucs4 >> 6) & 0x3F));
          *p++ = (XML_Char)(0x80 | (ucs4 & 0x3F));
          i++;
          continue;
        }
        /* Fall through: handles isolated high surrogates */
      }
      *p++ = (XML_Char)(0xE0 | (ch >> 12));
      *p++ = (XML_Char)(0x80 | ((ch >> 6) & 0x3F));
      *p++ = (XML_Char)(0x80 | (ch & 0x3F));
    }
  }

  /* Zero-terminate the XML_Char string for Expat */
  *p++ = 0;

  /* Cut back to the size actually needed. */
  return (XML_Char *) realloc(result, p - result);
}
#endif

XML_Char *XMLChar_FromObject(PyObject *obj)
{
  XML_Char *result;
  PyObject *unistr = PyUnicode_FromObject(obj);
  if (unistr == NULL) return NULL;

  result = XMLChar_FromUnicode(PyUnicode_AS_UNICODE(unistr),
                               PyUnicode_GET_SIZE(unistr));
  Py_DECREF(unistr);
  return result;
}


void XMLChar_Print(FILE *fp, const XML_Char *s)
{
  PyObject *u = Unicode_FromXMLChar(s);
  PyObject_Print(u, fp, 0);
  Py_XDECREF(u);
}
