#
################################################################################
# Copyright 2023-2025 by NI SP Software GmbH, All rights reserved.
# Copyright 1999-2023 by Nice, srl., All rights reserved.
#
# This software includes confidential and proprietary information
# of NI SP Software GmbH ("Confidential Information").
# You shall not disclose such Confidential Information
# and shall use it only in accordance with the terms of
# the license agreement you entered into with NI SP Software.
################################################################################
#################################################################################

# --------------------------------------------------------------------------- #
# EF_XML_escape
# =============
# Perform XML escaping for AWK.
#
# Characters that are not valid according to the XML specification must be replaced as they could lead to parsing errors.
# In particular only #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] are considered valid.
# NOTE 1: We don't need to escape the #xD800-#xDFFF because the Unicode standard permanently reserves these code point values
# for UTF-16 encoding of the high and low surrogates and they will never be assigned a character.
# Furthermore, awk doesn't support \uNNNN characters but only \xNN: https://lists.gnu.org/archive/html/sed-devel/2017-01/msg00005.html.
# NOTE 2: We don't need to escape the characters bigger than #xFFFF because they are encoded by UTF-16 using surrogate pairs.
#
# Escaping table
# Source: https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
#         https://owasp.github.io/owasp-java-encoder/encoder/apidocs/org/owasp/encoder/Encode.html#forHtml(java.lang.String)
#
# escapeXmlFull()
# Input     Output
# &         &amp;
# <         &lt;
# >         &gt;
# "         &#x22;
# '         &#x27;
# The encoding of the greater-than sign (>) is not strictly required but it's useful to avoid ']]>' character sequence.
#
# escapeXmlAttribute()
# Input     Output
# &         &amp;
# <         &lt;
# "         &#x22;
# '         &#x27;
# The encoding of the greater-than sign (>) is not required for attributes.
#
# escapeXmlContent()
# Input     Output
# &         &amp;
# <         &lt;
# >         &gt;
# The encoding of the greater-than sign (>) is not strictly required but it's useful to avoid ']]>' character sequence.
# --------------------------------------------------------------------------- #


function _replaceXmlInvalidCharacters(s) {
    gsub("[\x01-\x08\x0B\x0C\x0E-\x1F]", "", s);
    return s;
}

function escapeXmlFull(s) {
    gsub(/&/, "\\&amp;", s);
    gsub(/</, "\\&lt;", s);
    gsub(/>/, "\\&gt;", s);
    gsub(/"/, "\\&#x22;", s);
    gsub(/'/, "\\&#x27;", s);
    return _replaceXmlInvalidCharacters(s);
}

function escapeXmlAttribute(s) {
    gsub(/&/, "\\&amp;", s);
    gsub(/</, "\\&lt;", s);
    gsub(/"/, "\\&#x22;", s);
    gsub(/'/, "\\&#x27;", s);
    return _replaceXmlInvalidCharacters(s);
}

function escapeXmlContent(s) {
    gsub(/&/, "\\&amp;", s);
    gsub(/</, "\\&lt;", s);
    gsub(/>/, "\\&gt;", s);
    return _replaceXmlInvalidCharacters(s);
}


# --------------------------------------------------------------------------- #


# DEPRECATED!
function xml_escape(_s, _quote) {
    gsub(/&/, "\\&amp;", _s);
    gsub(/</, "\\&lt;", _s);
    gsub(/>/, "\\&gt;", _s);
    if (_quote) {
        gsub(/"/, "\\&quot;", _s);
    }
    return _s;
}


function xml_attribute(_attribute_name, _attribute_value) {
    return " " _attribute_name "=\"" xml_escape(_attribute_value, 1) "\""
}


function xml_unescape(_s) {
    gsub(/&lt;/, "<", _s);
    gsub(/&gt;/, ">", _s);
    gsub(/&quot;/, "\"", _s);
    gsub(/&amp;/, "\\&", _s);

    return _s;
}


function sh_escape(_s) {
    gsub(/'/, "'\"'\"'", _s);

    return _s
}


function trim(_s) {
    gsub(/^[\t ]+/, "", _s)
    gsub(/[\t ]+$/, "", _s)

    return _s
}


function json_escape(_s) {

    _new_s = ""

    for (_i=1; _i<=length(_s); _i++) {
        _c = substr(_s, _i, 1)

        if (_c == "\"" || _c == "\\") {
            _new_s = _new_s "\\"  _c
        } else {
            _new_s = _new_s _c
        }

    }

    return _new_s
}

function urlencode(_s) {
    # Encode url
    split ("1 2 3 4 5 6 7 8 9 A B C D E F", _hextab, " ")
    _hextab[0] = 0
    for (_i=1; _i<=255; _i++) ord[ sprintf ("%c", _i) "" ] = _i + 0

    _encoded="";
    for(_i=1;_i<=length(_s); _i++) {
      _c = substr(_s,_i,1)
      if ( _c ~ /[a-zA-Z0-9._-]/ ) {
        _encoded = _encoded _c   # safe character
      } else if ( _c == " " ) {
        _encoded = _encoded "+" # special handling
      } else {
        # unsafe character, encode it as a two-digit hex-number
        _lo = ord[_c] % 16
        _hi = int(ord[_c] / 16);
        _encoded = _encoded "%" _hextab[_hi] _hextab[_lo]
      }
    }
    print (_encoded);
}

#
# vi: ts=4 sw=4 et syntax=awk :
#

