2025-04-25 21:14:54 +09:00

200 lines
4.8 KiB
C#

using System;
using System.Collections;
using System.Collections.Generic;
namespace UniJSON
{
public struct Utf8Iterator : IEnumerator<Byte>
{
Byte[] m_bytes;
int m_offset;
int m_start;
int m_position;
int m_end;
public Utf8Iterator(ArraySegment<Byte> range, int start = 0)
{
m_bytes = range.Array;
m_offset = range.Offset;
m_start = m_offset + start;
m_position = -1;
m_end = range.Offset + range.Count;
}
public int BytePosition
{
get { return m_position - m_offset; }
}
public int CurrentByteLength
{
get
{
var firstByte = Current;
if (firstByte <= 0x7F)
{
return 1;
}
else if (firstByte <= 0xDF)
{
return 2;
}
else if (firstByte <= 0xEF)
{
return 3;
}
else if (firstByte <= 0xF7)
{
return 4;
}
else
{
throw new Exception("invalid utf8");
}
}
}
public byte Current
{
get { return m_bytes[m_position]; }
}
object IEnumerator.Current
{
get { return Current; }
}
public byte Second
{
get { return m_bytes[m_position + 1]; }
}
public byte Third
{
get { return m_bytes[m_position + 2]; }
}
public byte Fourth
{
get { return m_bytes[m_position + 3]; }
}
public const uint Mask1 = 0x01;
public const uint Mask2 = 0x03;
public const uint Mask3 = 0x07;
public const uint Mask4 = 0x0F;
public const uint Mask5 = 0x1F;
public const uint Mask6 = 0x3F;
public const uint Mask7 = 0x7F;
public const uint Mask11 = 0x07FF;
public const uint Head1 = 0x80;
public const uint Head2 = 0xC0;
public const uint Head3 = 0xE0;
public const uint Head4 = 0xF0;
public static int ByteLengthFromChar(char c)
{
if (c <= Mask7)
{
return 1;
}
else if (c <= Mask11)
{
return 2;
}
else
{
return 3;
}
}
public uint Unicode
{
get
{
var l = CurrentByteLength;
if (l == 1)
{
// 7bit
return Current;
}
else if (l == 2)
{
// 11bit
return (Mask5 & Current) << 6 | (Mask6 & Second);
}
else if (l == 3)
{
// 16bit
return (Mask4 & Current) << 12 | (Mask6 & Second) << 6 | (Mask6 & Third);
}
else if (l == 4)
{
// 21bit
return (Mask3 & Current) << 18 | (Mask6 & Second) << 12 | (Mask6 & Third) << 6 | (Mask6 & Fourth);
}
else
{
throw new Exception("invalid utf8");
}
}
}
public char Char
{
get
{
var l = CurrentByteLength;
if (l == 1)
{
// 7bit
return (char)Current;
}
else if (l == 2)
{
// 11bit
return (char)((Mask5 & Current) << 6 | (Mask6 & Second));
}
else if (l == 3)
{
// 16bit
return (char)((Mask4 & Current) << 12 | (Mask6 & Second) << 6 | (Mask6 & Third));
}
else if (l == 4)
{
// 21bit
throw new NotImplementedException();
}
else
{
throw new Exception("invalid utf8");
}
}
}
public void Dispose()
{
}
public bool MoveNext()
{
if (m_position == -1)
{
m_position = m_start;
}
else
{
m_position += CurrentByteLength;
}
return m_position < m_end;
}
public void Reset()
{
m_position = -1;
}
}
}