200 lines
4.8 KiB
C#
200 lines
4.8 KiB
C#
using System;
|
|
using System.Collections;
|
|
using System.Collections.Generic;
|
|
|
|
|
|
namespace UniJSON
|
|
{
|
|
public struct Utf8Iterator : IEnumerator<Byte>
|
|
{
|
|
Byte[] m_bytes;
|
|
int m_offset;
|
|
int m_start;
|
|
int m_position;
|
|
int m_end;
|
|
|
|
public Utf8Iterator(ArraySegment<Byte> range, int start = 0)
|
|
{
|
|
m_bytes = range.Array;
|
|
m_offset = range.Offset;
|
|
m_start = m_offset + start;
|
|
m_position = -1;
|
|
m_end = range.Offset + range.Count;
|
|
}
|
|
|
|
public int BytePosition
|
|
{
|
|
get { return m_position - m_offset; }
|
|
}
|
|
|
|
public int CurrentByteLength
|
|
{
|
|
get
|
|
{
|
|
var firstByte = Current;
|
|
if (firstByte <= 0x7F)
|
|
{
|
|
return 1;
|
|
}
|
|
else if (firstByte <= 0xDF)
|
|
{
|
|
return 2;
|
|
}
|
|
else if (firstByte <= 0xEF)
|
|
{
|
|
return 3;
|
|
}
|
|
else if (firstByte <= 0xF7)
|
|
{
|
|
return 4;
|
|
}
|
|
else
|
|
{
|
|
throw new Exception("invalid utf8");
|
|
}
|
|
}
|
|
}
|
|
|
|
public byte Current
|
|
{
|
|
get { return m_bytes[m_position]; }
|
|
}
|
|
|
|
object IEnumerator.Current
|
|
{
|
|
get { return Current; }
|
|
}
|
|
|
|
public byte Second
|
|
{
|
|
get { return m_bytes[m_position + 1]; }
|
|
}
|
|
|
|
public byte Third
|
|
{
|
|
get { return m_bytes[m_position + 2]; }
|
|
}
|
|
|
|
public byte Fourth
|
|
{
|
|
get { return m_bytes[m_position + 3]; }
|
|
}
|
|
|
|
public const uint Mask1 = 0x01;
|
|
public const uint Mask2 = 0x03;
|
|
public const uint Mask3 = 0x07;
|
|
public const uint Mask4 = 0x0F;
|
|
public const uint Mask5 = 0x1F;
|
|
public const uint Mask6 = 0x3F;
|
|
public const uint Mask7 = 0x7F;
|
|
public const uint Mask11 = 0x07FF;
|
|
|
|
public const uint Head1 = 0x80;
|
|
public const uint Head2 = 0xC0;
|
|
public const uint Head3 = 0xE0;
|
|
public const uint Head4 = 0xF0;
|
|
|
|
public static int ByteLengthFromChar(char c)
|
|
{
|
|
if (c <= Mask7)
|
|
{
|
|
return 1;
|
|
}
|
|
else if (c <= Mask11)
|
|
{
|
|
return 2;
|
|
}
|
|
else
|
|
{
|
|
return 3;
|
|
}
|
|
}
|
|
|
|
public uint Unicode
|
|
{
|
|
get
|
|
{
|
|
var l = CurrentByteLength;
|
|
if (l == 1)
|
|
{
|
|
// 7bit
|
|
return Current;
|
|
}
|
|
else if (l == 2)
|
|
{
|
|
// 11bit
|
|
return (Mask5 & Current) << 6 | (Mask6 & Second);
|
|
}
|
|
else if (l == 3)
|
|
{
|
|
// 16bit
|
|
return (Mask4 & Current) << 12 | (Mask6 & Second) << 6 | (Mask6 & Third);
|
|
}
|
|
else if (l == 4)
|
|
{
|
|
// 21bit
|
|
return (Mask3 & Current) << 18 | (Mask6 & Second) << 12 | (Mask6 & Third) << 6 | (Mask6 & Fourth);
|
|
}
|
|
else
|
|
{
|
|
throw new Exception("invalid utf8");
|
|
}
|
|
}
|
|
}
|
|
|
|
public char Char
|
|
{
|
|
get
|
|
{
|
|
var l = CurrentByteLength;
|
|
if (l == 1)
|
|
{
|
|
// 7bit
|
|
return (char)Current;
|
|
}
|
|
else if (l == 2)
|
|
{
|
|
// 11bit
|
|
return (char)((Mask5 & Current) << 6 | (Mask6 & Second));
|
|
}
|
|
else if (l == 3)
|
|
{
|
|
// 16bit
|
|
return (char)((Mask4 & Current) << 12 | (Mask6 & Second) << 6 | (Mask6 & Third));
|
|
}
|
|
else if (l == 4)
|
|
{
|
|
// 21bit
|
|
throw new NotImplementedException();
|
|
}
|
|
else
|
|
{
|
|
throw new Exception("invalid utf8");
|
|
}
|
|
}
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
}
|
|
|
|
public bool MoveNext()
|
|
{
|
|
if (m_position == -1)
|
|
{
|
|
m_position = m_start;
|
|
}
|
|
else
|
|
{
|
|
m_position += CurrentByteLength;
|
|
}
|
|
return m_position < m_end;
|
|
}
|
|
|
|
public void Reset()
|
|
{
|
|
m_position = -1;
|
|
}
|
|
}
|
|
}
|