feat: Add SJSON library

2021-07-26 20:12:10 +02:00 · 2021-07-26 20:12:10 +02:00 · 64a15a0274
commit 64a15a0274
parent 2e7282956f
2 changed files with 575 additions and 0 deletions
--- a/addons/bitsquid/sjson/LICENSE.txt
+++ b/addons/bitsquid/sjson/LICENSE.txt
@ -0,0 +1,23 @@
+Copyright (c) 2014-2018, Matthäus G. Chajdas
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/addons/bitsquid/sjson/init.py
+++ b/addons/bitsquid/sjson/init.py
@ -0,0 +1,552 @@
+"""Module to parse SJSON files."""
+# coding=utf8
+# @author: Matthäus G. Chajdas
+# @license: 3-clause BSD
+
+import collections.abc
+import collections
+import numbers
+import string
+import io
+from enum import Enum
+
+__version__ = '2.1.0'
+
+
+class MemoryInputStream:
+    """Input stream wrapper for reading directly from memory."""
+    def __init__(self, s):
+        """
+        s -- a bytes object.
+        """
+        self._stream = s
+        self._current_index = 0
+        self._length = len(s)
+
+    def read(self, count=1):
+        """read ``count`` bytes from the stream."""
+        end_index = self._current_index + count
+        if end_index > self._length:
+            _raise_end_of_file_exception(self)
+        result = self._stream[self._current_index:end_index]
+        self._current_index = end_index
+        return result
+
+    def peek(self, count=1, allow_end_of_file=False):
+        """peek ``count`` bytes from the stream. If ``allow_end_of_file`` is
+        ``True``, no error will be raised if the end of the stream is reached
+        while trying to peek."""
+        end_index = self._current_index + count
+        if end_index > self._length:
+            if allow_end_of_file:
+                return None
+            _raise_end_of_file_exception(self)
+
+        return self._stream[self._current_index:end_index]
+
+    def skip(self, count=1):
+        """skip ``count`` bytes."""
+        self._current_index += count
+
+    def get_location(self):
+        """Get the current location in the stream."""
+        loc = collections.namedtuple('Location', ['line', 'column'])
+        bytes_read = self._stream[:self._current_index]
+        line = 1
+        column = 1
+        for byte in bytes_read:
+            # We test the individual bytes here, must use ord
+            if byte == ord('\n'):
+                line += 1
+                column = 1
+            else:
+                column += 1
+        return loc(line, column)
+
+
+class ByteBufferInputStream:
+    """Input stream wrapper for reading directly from an I/O object."""
+    def __init__(self, stream):
+        self._stream = stream
+        self._index = 0
+        self._line = 1
+        self._column = 1
+
+    def read(self, count=1):
+        """read ``count`` bytes from the stream."""
+        result = self._stream.read(count)
+        if len(result) < count:
+            _raise_end_of_file_exception(self)
+
+        for char in result:
+            # We test the individual bytes here, must use ord
+            if char == ord('\n'):
+                self._line += 1
+                self._column = 1
+            else:
+                self._column += 1
+        return result
+
+    def peek(self, count=1, allow_end_of_file=False):
+        """peek ``count`` bytes from the stream. If ``allow_end_of_file`` is
+        ``True``, no error will be raised if the end of the stream is reached
+        while trying to peek."""
+        result = self._stream.peek(count)
+        if not result and not allow_end_of_file:
+            _raise_end_of_file_exception(self)
+        elif not result and allow_end_of_file:
+            return None
+        else:
+            return result[:count]
+
+    def skip(self, count=1):
+        """skip ``count`` bytes."""
+        self.read(count)
+
+    def get_location(self):
+        """Get the current location in the stream."""
+        loc = collections.namedtuple('Location', ['line', 'column'])
+        return loc(self._line, self._column)
+
+
+class ParseException(RuntimeError):
+    """Parse exception."""
+    def __init__(self, msg, location):
+        super(ParseException, self).__init__(msg)
+        self._msg = msg
+        self._location = location
+
+    def get_location(self):
+        """Get the current location at which the exception occurred."""
+        return self._location
+
+    def __str__(self):
+        return '{} at line {}, column {}'.format(self._msg,
+                                                 self._location.line,
+                                                 self._location.column)
+
+
+def _raise_end_of_file_exception(stream):
+    raise ParseException('Unexpected end-of-stream', stream.get_location())
+
+
+def _consume(stream, what):
+    _skip_whitespace(stream)
+    what_len = len(what)
+    if stream.peek(what_len) != what:
+        raise ParseException("Expected to read '{}'".format(what),
+                             stream.get_location())
+    stream.skip(what_len)
+
+
+def _skip_characters_and_whitespace(stream, num_char_to_skip):
+    stream.skip(num_char_to_skip)
+    return _skip_whitespace(stream)
+
+
+_WHITESPACE_SET = {b' ', b'\t', b'\n', b'\r'}
+
+
+def _is_whitespace(char):
+    return char in _WHITESPACE_SET
+
+
+def _skip_c_style_comment(stream):
+    comment_start_location = stream.get_location()
+    # skip the comment start
+    stream.skip(2)
+    # we don't support nested comments, so we're not going to
+    # count the nesting level. Instead, skip ahead until we
+    # find a closing */
+    while True:
+        next_char = stream.peek(1, allow_end_of_file=True)
+        if next_char == b'*':
+            comment_end = stream.peek(2, allow_end_of_file=True)
+            if comment_end == b'*/':
+                stream.skip(2)
+                break
+            else:
+                stream.skip()
+        elif next_char is None:
+            raise ParseException("Could not find closing '*/' for comment",
+                                 comment_start_location)
+        stream.skip()
+
+
+def _skip_cpp_style_comment(stream):
+    # skip the comment start
+    stream.skip(2)
+    while True:
+        next_char = stream.peek(allow_end_of_file=True)
+        if next_char is None or next_char == b'\n':
+            break
+        stream.skip()
+
+
+def _skip_whitespace(stream):
+    """skip whitespace. Returns the next character if a new position within the
+    stream was found; returns None if the end of the stream was hit."""
+    while True:
+        next_char = stream.peek(allow_end_of_file=True)
+        if not _is_whitespace(next_char):
+            if next_char == b'/':
+                # this could be a C or C++ style comment
+                comment_start = stream.peek(2, allow_end_of_file=True)
+                if comment_start == b'/*':
+                    _skip_c_style_comment(stream)
+                    continue
+                elif comment_start == b'//':
+                    _skip_cpp_style_comment(stream)
+                    continue
+            break
+        stream.skip()
+
+    return next_char
+
+
+_IDENTIFIER_SET = set(string.ascii_letters + string.digits + '_')
+
+
+def _is_identifier(obj):
+    return chr(obj[0]) in _IDENTIFIER_SET
+
+
+def _decode_escaped_character(char):
+    if char == b'b':
+        return b'\b'
+    elif char == b'n':
+        return b'\n'
+    elif char == b't':
+        return b'\t'
+    elif char == b'\\' or char == b'\"':
+        return char
+    else:
+        # If we get here, it's an invalid escape sequence. We will simply return
+        # it as-if it was not invalid (i.e. \l for instance will get turned
+        # into \\l)
+        return b'\\' + char
+
+
+class RawQuoteStyle(Enum):
+    Lua = 1
+    Python = 2
+
+
+def _decode_string(stream, allow_identifier=False):
+    # When we enter here, we either start with " or [, or there is no quoting
+    # enabled.
+    _skip_whitespace(stream)
+
+    result = bytearray()
+
+    is_quoted = stream.peek() == b'\"' or stream.peek() == b'['
+    if not allow_identifier and not is_quoted:
+        raise ParseException('Quoted string expected', stream.get_location())
+
+    raw_quotes = None
+    # Try Python-style, """ delimited strings
+    if is_quoted and stream.peek(3) == b'\"\"\"':
+        stream.skip(3)
+        raw_quotes = RawQuoteStyle.Python
+    # Try Lua-style, [=[ delimited strings
+    elif is_quoted and stream.peek(3) == b'[=[':
+        stream.skip(3)
+        raw_quotes = RawQuoteStyle.Lua
+    elif is_quoted and stream.peek() == b'\"':
+        stream.skip()
+    elif is_quoted:
+        #
+        raise ParseException('Invalid quoted string, must start with ",'
+                             '""", or [=[',
+                             stream.get_location())
+
+    parse_as_identifier = not is_quoted
+
+    while True:
+        next_char = stream.peek()
+        if parse_as_identifier and not _is_identifier(next_char):
+            break
+
+        if raw_quotes:
+            if raw_quotes == RawQuoteStyle.Python and \
+                    next_char == b'\"' and stream.peek(3) == b'\"\"\"':
+                # This is a tricky case -- we're in a """ quoted string, and
+                # we spotted three consecutive """. This could mean we're at the
+                # end, but it doesn't have to be -- we actually need to check
+                # all the cases below:
+                #   * """: simple case, just end here
+                #   * """": A single quote inside the string,
+                #     followed by the end marker
+                #   * """"": A double double quote inside the string,
+                #     followed by the end marker
+                # Note that """""" is invalid, no matter what follows
+                # afterwards, as the first group of three terminates the string,
+                # and then we'd have an unrelated string afterwards. We don't
+                # concat strings automatically so this will trigger an error
+                # Start with longest match, as the other is prefix this has
+                # to be the first check
+                if stream.peek(5, allow_end_of_file=True) == b'\"\"\"\"\"':
+                    result += b'\"\"'
+                    stream.skip(5)
+                    break
+                elif stream.peek(4, allow_end_of_file=True) == b'\"\"\"\"':
+                    result += next_char
+                    stream.skip(4)
+                    break
+                stream.skip(3)
+                break
+            elif raw_quotes == RawQuoteStyle.Lua and \
+                    next_char == b']' and stream.peek(3) == b']=]':
+                stream.skip(3)
+                break
+            else:
+                result += next_char
+                stream.skip(1)
+        else:
+            if next_char == b'\"':
+                stream.read()
+                break
+            elif next_char == b'\\':
+                stream.skip()
+                result += _decode_escaped_character(stream.read())
+            else:
+                result += next_char
+                stream.skip()
+
+    return str(result, encoding='utf-8')
+
+
+_NUMBER_SEPARATOR_SET = _WHITESPACE_SET.union({b',', b']', b'}', None})
+
+
+def _decode_number(stream, next_char):
+    """Parse a number.
+
+    next_char -- the next byte in the stream.
+    """
+    number_bytes = bytearray()
+    is_decimal_number = False
+
+    while True:
+        if next_char in _NUMBER_SEPARATOR_SET:
+            break
+
+        if next_char == b'.' or next_char == b'e' or next_char == b'E':
+            is_decimal_number = True
+
+        number_bytes += next_char
+        stream.skip()
+
+        next_char = stream.peek(allow_end_of_file=True)
+
+    value = number_bytes.decode('utf-8')
+
+    if is_decimal_number:
+        return float(value)
+    return int(value)
+
+
+def _decode_dict(stream, delimited=False):
+    """
+    delimited -- if ``True``, parsing will stop once the end-of-dictionary
+                 delimiter has been reached(``}``)
+    """
+    from collections import OrderedDict
+    result = OrderedDict()
+
+    if stream.peek() == b'{':
+        stream.skip()
+
+    next_char = _skip_whitespace(stream)
+
+    while True:
+        if not delimited and next_char is None:
+            break
+
+        if next_char == b'}':
+            stream.skip()
+            break
+
+        key = _decode_string(stream, True)
+        next_char = _skip_whitespace(stream)
+        # We allow both '=' and ':' as separators inside maps
+        if next_char == b'=' or next_char == b':':
+            _consume(stream, next_char)
+        value = _parse(stream)
+        result[key] = value
+
+        next_char = _skip_whitespace(stream)
+        if next_char == b',':
+            next_char = _skip_characters_and_whitespace(stream, 1)
+
+    return result
+
+
+def _parse_list(stream):
+    result = []
+    # skip '['
+    next_char = _skip_characters_and_whitespace(stream, 1)
+
+    while True:
+        if next_char == b']':
+            stream.skip()
+            break
+
+        value = _parse(stream)
+        result.append(value)
+
+        next_char = _skip_whitespace(stream)
+        if next_char == b',':
+            next_char = _skip_characters_and_whitespace(stream, 1)
+
+    return result
+
+
+def _parse(stream):
+    next_char = _skip_whitespace(stream)
+
+    if next_char == b't':
+        _consume(stream, b'true')
+        return True
+    elif next_char == b'f':
+        _consume(stream, b'false')
+        return False
+    elif next_char == b'n':
+        _consume(stream, b'null')
+        return None
+    elif next_char == b'{':
+        return _decode_dict(stream, True)
+    elif next_char == b'\"':
+        return _decode_string(stream)
+    elif next_char == b'[':
+        peek = stream.peek(2, allow_end_of_file=False)
+        # second lookup character for [=[]=] raw literal strings
+        next_char_2 = peek[1:2]
+        if next_char_2 != b'=':
+            return _parse_list(stream)
+        elif next_char_2 == b'=':
+            return _decode_string(stream)
+
+    try:
+        return _decode_number(stream, next_char)
+    except ValueError:
+        raise ParseException('Invalid character', stream.get_location())
+
+
+def load(stream):
+    """Load a SJSON object from a stream."""
+    return _decode_dict(ByteBufferInputStream(io.BufferedReader(stream)))
+
+
+def loads(text):
+    """Load a SJSON object from a string."""
+    return _decode_dict(MemoryInputStream(text.encode('utf-8')))
+
+
+def dumps(obj, indent=None):
+    """Dump an object to a string."""
+    import io
+    stream = io.StringIO()
+    dump(obj, stream, indent)
+    return stream.getvalue()
+
+
+def dump(obj, fp, indent=None):
+    """Dump an object to a stream."""
+    if not indent:
+        _indent = ''
+    elif isinstance(indent, numbers.Number):
+        if indent < 0:
+            indent = 0
+        _indent = ' ' * indent
+    else:
+        _indent = indent
+
+    for e in _encode(obj, indent=_indent):
+        fp.write(e)
+
+
+_ESCAPE_CHARACTER_SET = {'\n': '\\n', '\b': '\\b', '\t': '\\t', '\"': '\\"'}
+
+
+def _escape_string(obj, quote=True):
+    """Escape a string.
+
+    If quote is set, the string will be returned with quotation marks at the
+    beginning and end. If quote is set to false, quotation marks will be only
+    added if needed(that is, if the string is not an identifier.)"""
+    if any([c not in _IDENTIFIER_SET for c in obj]):
+        # String must be quoted, even if quote was not requested
+        quote = True
+
+    if quote:
+        yield '"'
+
+    for key, value in _ESCAPE_CHARACTER_SET.items():
+        obj = obj.replace(key, value)
+
+    yield obj
+
+    if quote:
+        yield '"'
+
+
+def _encode(obj, separators=(', ', '\n', ' = '), indent=0, level=0):
+    if obj is None:
+        yield 'null'
+    # Must check for true, false before number, as boolean is an instance of
+    # Number, and str(obj) would return True/False instead of true/false then
+    elif obj is True:
+        yield 'true'
+    elif obj is False:
+        yield 'false'
+    elif isinstance(obj, numbers.Number):
+        yield str(obj)
+    # Strings are also Sequences, but we don't want to encode as lists
+    elif isinstance(obj, str):
+        yield from _escape_string(obj)
+    elif isinstance(obj, collections.abc.Sequence):
+        yield from _encode_list(obj, separators, indent, level)
+    elif isinstance(obj, collections.abc.Mapping):
+        yield from _encode_dict(obj, separators, indent, level)
+    else:
+        raise RuntimeError("Unsupported object type")
+
+
+def _indent(level, indent):
+    return indent * level
+
+
+def _encode_key(k):
+    yield from _escape_string(k, False)
+
+
+def _encode_list(obj, separators, indent, level):
+    yield '['
+    first = True
+    for element in obj:
+        if first:
+            first = False
+        else:
+            yield separators[0]
+        yield from _encode(element, separators, indent, level+1)
+    yield ']'
+
+
+def _encode_dict(obj, separators, indent, level):
+    if level > 0:
+        yield '{\n'
+    first = True
+    for key, value in obj.items():
+        if first:
+            first = False
+        else:
+            yield '\n'
+        yield _indent(level, indent)
+        yield from _encode_key(key)
+        yield separators[2]
+        yield from _encode(value, separators, indent, level+1)
+    yield '\n'
+    yield _indent(level-1, indent)
+    if level > 0:
+        yield '}'