#!/bin/sh

# EnginFrame XML NameSpace
EF_XMLNS='http://www.enginframe.com/2000/EnginFrame'
export EF_XMLNS

# --------------------------------------------------------------------------- #
# EF_XML_escape
# =============
# Perform XML escaping from pipe and command line.
#
# Arguments:
# -i ARG | --input ARG   Input to escape
# -i=ARG | --input=ARG
#          -             Read input from pipe
#          --            Stop command line parsing

# Characters that are not valid according to the XML specification must be replaced as they could lead to parsing errors.
# In particular only #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] are considered valid.
# NOTE 1: We don't need to escape the #xD800-#xDFFF because the Unicode standard permanently reserves these code point values
# for UTF-16 encoding of the high and low surrogates and they will never be assigned a character.
# NOTE 2: We don't need to escape the characters bigger than #xFFFF because they are encoded by UTF-16 using surrogate pairs.
#
# Escaping table
# Source: https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
#         https://owasp.github.io/owasp-java-encoder/encoder/apidocs/org/owasp/encoder/Encode.html#forHtml(java.lang.String)
#
# ef_xml_escape_full()
# Input     Output
# &         &amp;
# <         &lt;
# >         &gt;
# "         &#x22;
# '         &#x27;
# The encoding of the greater-than sign (>) is not strictly required but it's useful to avoid ']]>' character sequence.

# ef_xml_escape_attribute()
# Input     Output
# &         &amp;
# <         &lt;
# "         &#x22;
# '         &#x27;
# The encoding of the greater-than sign (>) is not required for attributes.
#
# ef_xml_escape_content()
# Input     Output
# &         &amp;
# <         &lt;
# >         &gt;
# The encoding of the greater-than sign (>) is not strictly required but it's useful to avoid ']]>' character sequence.
# --------------------------------------------------------------------------- #

__ef_xml_escape_function() {
    # local -- _sed_cmd="$1"
    local  _sed_cmd="$1"

    tr -d '\001-\010\013\014\016-\037' | sed -e "${_sed_cmd}"

    # how to test -> printf "abc\nDde\tf" | sed -e "${_sed_cmd}" | od -bc
    # how to test -> printf "a\x18a" | tr -d $'...' | hexdump -C
}

__ef_xml_escape_parameter() {
    local _sed="$1"
    shift
    local _input=''
    local _pipe=false

    while [ "$#" -gt 0 ]; do
        case "$1" in
            -i|--input)
                _input="$2"
                shift
                ;;
            -i=*|--input=*)
                _input="${1#*=}"
                ;;
            -)
                _pipe=true
                ;;
            --)
                shift
                break;
                ;;
            *)
                break
                ;;
        esac
        shift
    done

    if [ "${_pipe}" = true ]; then
        __ef_xml_escape_function "${_sed}"
    fi

    if [ -n "${_input}" ]; then
        # __ef_xml_escape_function "${_sed}" <<< "${_input}"
        # echo  "${_input}" | __ef_xml_escape_function "${_sed}"  
        printf "%s" "${_input}" | __ef_xml_escape_function "${_sed}"  
    fi

    return 0
}

ef_xml_escape_full() {
    local  _sed_cmd=''

    _sed_cmd="s/\&/\&amp;/g"
    _sed_cmd="$_sed_cmd;s/</\&lt;/g"
    _sed_cmd="$_sed_cmd;s/>/\&gt;/g"
    _sed_cmd="$_sed_cmd;s/\"/\&#x22;/g"
    _sed_cmd="$_sed_cmd;s/'/\&#x27;/g"
    __ef_xml_escape_parameter "$_sed_cmd" "$@"
}

ef_xml_escape_attribute() {
    local _sed_cmd=''

    _sed_cmd="s/\&/\&amp;/g"
    _sed_cmd="$_sed_cmd;s/</\&lt;/g"
    _sed_cmd="$_sed_cmd;s/\"/\&#x22;/g"
    _sed_cmd="$_sed_cmd;s/'/\&#x27;/g"
    __ef_xml_escape_parameter "$_sed_cmd" "$@"
}

ef_xml_escape_content() {
    local _sed_cmd=''

    _sed_cmd="s/\&/\&amp;/g"
    _sed_cmd="$_sed_cmd;s/</\&lt;/g"
    _sed_cmd="$_sed_cmd;s/>/\&gt;/g"
    __ef_xml_escape_parameter "$_sed_cmd" "$@"
}

# --------------------------------------------------------------------------- #
# DEPRECATED!
# EF_xml_escape
# =============
# Perform XML escaping from (in order) pipe and command line.
#
# Arguments:
# -i ARG | --input ARG   Input to escape
# -i=ARG | --input=ARG
# -a     | --quote-attr  Escape also double quote
# -p     | --pipe        Read input from pipe
#          --            Stop command line parsing
# --------------------------------------------------------------------------- #

# DEPRECATED!
ef_xml_escape() {
    local  _input=''
    local  _quote=false
    local  _pipe=false
    local  _output=''
    local  _sed_cmd=''

    while [ "$#" -gt 0 ]; do
        case "$1" in
            -i|--input)
                _input="$2"
                shift
                ;;
            -i=*|--input=*)
                _input="${1#*=}"
                ;;
            -p|--pipe)
                _pipe=true
                ;;
            -a|--quote-attr)
                _quote=true
                ;;
            --)
                shift
                break;
                ;;
            *)
                break
                ;;

        esac
        shift
    done

    if [ "${_pipe}" = true ]; then
        _sed_cmd="s/\&/\&amp;/g"
        _sed_cmd="$_sed_cmd;s/</\&lt;/g"
        _sed_cmd="$_sed_cmd;s/>/\&gt;/g"

        if [ "$_quote" = true ]; then
            _sed_cmd="$_sed_cmd;s/\"/\&quot;/g"
        fi

        cat - | sed -e "${_sed_cmd}"
    fi

    if [ -n "${_input}" ]; then
        _output="${_input}"
        # _output="${_output//&/&amp;}"
	_output=$(echo "$_output" | sed -e 's/&/\&amp;/g')
        # _output="${_output//</w&lt;}"
	_output=$(echo "$_output" | sed -e 's/</\&lt;/g')
        # _output="${_output//>/&gt;}"
	_output=$(echo "$_output" | sed -e 's/>/\&gt;/g')
        if [ "$_quote" = true ]; then
            # _output="${_output//\"/&quot;}"
	    _output=$(echo "$_output" | sed -e 's/"/\&quot;/g')
        fi

        printf '%s\n' "${_output}"
    fi

    return 0
}


#
# vi: ts=4 sw=4 et syntax=sh :
#
