
#
################################################################################
# Copyright 2023-2025 by NI SP Software GmbH, All rights reserved.
# Copyright 1999-2023 by Nice, srl., All rights reserved.
#
# This software includes confidential and proprietary information
# of  NI SP Software GmbH ("Confidential Information").
# You shall not disclose such Confidential Information
# and shall use it only in accordance with the terms of
# the license agreement you entered into with NI SP Software.
################################################################################
#################################################################################


# Variables:
# - GRIDML_COMPRESSION
# - COMPRESS_ARRAY
# - CLUSTER_ID
# - statusmap


BEGIN {
    first = 1
    name_found = 0
    array_found = 0
    FS = "\t"
    current_jobid = 0
    job_list_emitted = 0
    deferred = ""

    # Read status mapping file
    while (getline < statusmap > 0) {
      split($0,fields,":")
      s = split(fields[2],label," ")
      for (i=1; i<=s; i++) {
        states[label[i]] = fields[1]
      }
    }
    close(statusmap)
}


END {
    if ((!first) && (array_found == 0))
        print "  </grid:job>"
    if (job_list_emitted) {
        print "</grid:job-list>"
        exit 0
    } else
        exit 1
}


function truncate(_s) {
    if ((GRID_TAG_SIZE_LIMIT != "") && (length(_s) > GRID_TAG_SIZE_LIMIT)) {
        _s = substr(_s, 1, GRID_TAG_SIZE_LIMIT - 5)
        sub(/ [^ ]*$/, "", _s);
        return _s "[...]"
    } else {
        return _s
    }
}


/^Year/ {
    year = $2
    month_day = $3
}


/^Job *Group/ {
    if ((!index(GRIDML_COMPRESSION, "group")) && (array_found == 0))
        print "    <grid:group>" escapeXmlContent($2) "</grid:group>"
    next
}


/^Job *Description/ {
    if ((!index(GRIDML_COMPRESSION, "description")) && (array_found == 0))
        print "    <grid:description>" escapeXmlContent(truncate($2)) "</grid:description>"
    next
}


/^Job Name/ {
    if (array_found == 0)
        print "    <grid:name>" escapeXmlContent($2) "</grid:name>"
    name_found = 1
    next
}


/^Job Description/ {
    next
}


/^Job *Priority/ {
    if ((!index(GRIDML_COMPRESSION, "priority")) && (array_found == 0))
        print "    <grid:priority>" $2 "</grid:priority>"
    next
}

/^Job will start no sooner than indicated time/ { next }

# manage multicluster jobs
/^Job[\t]* *.* forwarded to cluster .* as Job / {
    job_start = index($0, "Job <")
    if (job_start > 0) {
        job_id_start = job_start + length("Job <")
        job_id = substr($0, job_id_start)
        if (job_id ~ /^[0-9]+$/) {
            remote_job_id = job_id
        }
    }
    next
}
/^Job[\t]* *.* of cluster .* accepted as Job / { next }

/^Job warning action/ { next }
/^Job action warning time/ { next }

/^Job( Id)?[ \t]+[0-9]+/ {
    if ((!first) && (array_found == 0))
        print "  </grid:job>"
    if (first) {
        printf "<grid:job-list type=\"lsf\" cluster=\"%s\" %s>\n" \
            , escapeXmlAttribute(CLUSTER_ID) \
            , EF_XMLNS_grid
        job_list_emitted=1
    }
    first = 0
    if (split($NF, val, "[^0-9]") > 1) {
        if (COMPRESS_ARRAY == "true") {
            array_found = 1
            current_jobid = val[1]
        } else {
            print "  <grid:job type=\"lsf\" id=\"" val[1] "[" val[2] "]\" array-id=\"" val[1] "\" index=\"" val[2] "\" >" ;
        }
    } else {
        array_found = 0
        print "  <grid:job type=\"lsf\" id=\"" $NF "\">"
    }
    name_found = 0
    remote_job_id=""
}


/^Event/ {
    if (array_found == 0) {
        gsub(" +", " ", $2)
        split($2, event, "[ :]")
        job_month_day = sprintf("%s%02s", event[2], event[3])

        # Translate months to numbers
        sub("Jan", "01", job_month_day)
        sub("Feb", "02", job_month_day)
        sub("Mar", "03", job_month_day)
        sub("Apr", "04", job_month_day)
        sub("May", "05", job_month_day)
        sub("Jun", "06", job_month_day)
        sub("Jul", "07", job_month_day)
        sub("Aug", "08", job_month_day)
        sub("Sep", "09", job_month_day)
        sub("Oct", "10", job_month_day)
        sub("Nov", "11", job_month_day)
        sub("Dec", "12", job_month_day)

        if (job_month_day > month_day)
            tmp_year = year-1
        else
            tmp_year = year

        timestamp = sprintf("month=\"%s\" day=\"%s\" hour=\"%s\" minute=\"%s\" second=\"%s\" year=\"%s\"" \
            , event[2] \
            , event[3] \
            , event[4] \
            , event[5] \
            , event[6] \
            , tmp_year \
            )
    }
}


/^User Priority/ {
    next
}


/^User Group/ {
    next
}


/^User/ {
    if (array_found == 0) {
        if (split($2, val, "\\") > 1)
            user = tolower(val[2])
        else
            user = $2
        print "    <grid:owner>" escapeXmlContent(user) "</grid:owner>"
    }
}


/^Project/ {
    if (array_found == 0)
        print "    <grid:account>" escapeXmlContent($2) "</grid:account>"
}


/^Queue/ {
    if (array_found == 0)
        print "    <grid:queue>" escapeXmlContent($2) "</grid:queue>"
}


/^Status/ {
    if (array_found == 0) {
        myStatus = states[$2]
        if (myStatus == "")
            myStatus = "Unknown"
        print "    <grid:status grid=\"" $2 "\" ef=\"" myStatus "\">" $2 "</grid:status>"
    }
}


/^[ \t]*Command/ {
    if (array_found == 0) {
        command = truncate($2)
        print "    <grid:command>" escapeXmlContent(command) "</grid:command>"
        if (name_found == 0) {
            # If it's a script text (streamed into the stdin)
            if (command ~ /^# *!/) {
                jobname = "[script]"
            } else {
                jobname = command
            }
            print "    <grid:name>" escapeXmlContent(jobname) "</grid:name>"
        }
        name_found = 0
    }
}


/^[ \t]*Requested Resources/ {
    if ((!index(GRIDML_COMPRESSION, "requirements")) && (array_found == 0))
        print "    <grid:requirements>" escapeXmlContent($2) "</grid:requirements>"
}


/^[ \t]*[0-9][0-9]* Processors Requested|^[ \t]*[0-9][0-9]* Task\(s\)/ {
    if (array_found == 0) {
        nppos = 1
        wcount = split($0, parallel, " ")
        for ( wi in parallel ) {
            if (parallel[wi] == "Processors" || parallel[wi] == "Task(s)") {
                nppos = (wi-1)
            }
        }
        sub("Processors","",parallel[nppos])
        sub("Task\\(s\\)","",parallel[nppos])
        i = split(parallel[nppos], procs, "-")
        if (!index(GRIDML_COMPRESSION, "parallel"))
            print "    <grid:parallel min=\"" procs[1] "\" max=\"" procs[i] "\"/>"

        res = match($0, /[ \t]Requested Resources/);
        if (res && (!index(GRIDML_COMPRESSION, "requirements")))
            print "    <grid:requirements>" escapeXmlContent($2) "</grid:requirements>"
    }
}


/^Submitted from host/ {
    if ((!index(GRIDML_COMPRESSION, "submission-time")) && (array_found == 0))
        print "    <grid:submission-time " timestamp "/>"
    if ((!index(GRIDML_COMPRESSION, "submission-host")) && (array_found == 0))
        print "    <grid:submission-host>" escapeXmlContent($2) "</grid:submission-host>"
}


/^Share group charged/ {
    next
}


/^CWD/ {
    if (array_found == 0)
        print "    <grid:submission-directory>" escapeXmlContent($2) "</grid:submission-directory>"
}


/^[\[]*[0-9]*[\]]*[ ]*[Ss]tarted on/ {
    if ((!index(GRIDML_COMPRESSION, "execution-time")) && (array_found == 0))
        print "    <grid:execution-time " timestamp "/>"
    if (array_found == 1) {
        if (split(current_jobid, val, "[^0-9]") > 1)
            print "  <grid:job type=\"lsf\" id=\"" val[1] "\" index=\"" val[2] "\" is-array=\"true\" mode=\"array\">"
        else
            print "  <grid:job type=\"lsf\" id=\"" current_jobid "\" is-array=\"true\" mode=\"array\">"
    }
    gsub("><", "\n    ", $0)

    if (!index(GRIDML_COMPRESSION, "execution-host")) {
        # cleanup execution_hosts array if there
        split("", execution_hosts)
        for (i = 2; i <= NF; i++) {
            if (index($i, "*")) {
                # parallel submission: host value is <cores>*<hostname> (e.g. 24*host1)
                split($i, host, "*")
                execution_hosts[host[2]] += host[1]
            }
            else {
                execution_hosts[$i]++
            }
        }
        for (j in execution_hosts) {
            print "    <grid:execution-host cores=\"" execution_hosts[j] "\">" escapeXmlContent(j) "</grid:execution-host>"
        }
    }

    if (array_found == 1)
        print "</grid:job>"
}


/^[ \t]*Allocated [0-9]* Slot\(s\) on Host\(s\)/ {
    if ((!index(GRIDML_COMPRESSION, "execution-time")) && (array_found == 0))
        print "    <grid:execution-time " timestamp "/>"
    if (array_found == 1) {
        if (split(current_jobid, val, "[^0-9]") > 1)
            print "  <grid:job type=\"lsf\" id=\"" val[1] "\" index=\"" val[2] "\" is-array=\"true\" mode=\"array\">"
        else
            print "  <grid:job type=\"lsf\" id=\"" current_jobid "\" is-array=\"true\" mode=\"array\">"
    }
    gsub("><", "\n    ", $0)

    if (!index(GRIDML_COMPRESSION, "execution-host")) {
        # cleanup execution_hosts array if there
        split("", execution_hosts)
        for (i = 2; i <= NF; i++) {
            if (index($i, "*")) {
                # parallel submission: host value is <cores>*<hostname> (e.g. 24*host1)
                split($i, host, "*")
                execution_hosts[host[2]] += host[1]
            }
            else {
                execution_hosts[$i]++
            }
        }
	# add remote_job_id=\"101\"
	if (length(execution_hosts) == 1 && remote_job_id != "") {
	    for (j in execution_hosts) {
		print "    <grid:execution-host remote_job_id=\"" remote_job_id "\" cores=\"" execution_hosts[j] "\">" escapeXmlContent(j) ":" remote_job_id "</grid:execution-host>"
		remote_job_id=""
	    }
	} else {
	    for (j in execution_hosts) {
		print "    <grid:execution-host cores=\"" execution_hosts[j] "\">" escapeXmlContent(j) "</grid:execution-host>"
	    }
	}
    }

    if (array_found == 1)
        print "</grid:job>"
}


/^Execution[ ]*CWD/ {
    if ((!index(GRIDML_COMPRESSION, "execution-directory")) && (array_found == 0))
        print "    <grid:execution-directory>" escapeXmlContent($2) "</grid:execution-directory>"
}


/^Specified Start Time/ {
    deferred = $2
}


/^Reasons/ {
    if ((array_found == 0) && (!index(GRIDML_COMPRESSION, "reasons"))) {
        tot = split($2, reason, ";")
        print "    <grid:reasons>"
        for (i = 1; i <= tot; i++)
            if (length(reason[i]) > 1) {
                if ((deferred) && (index(reason[i], "Job has a specified start time"))) {
                    print "      <grid:reason>" escapeXmlContent(reason[i]) " ("
                    system("date -d " "\"" deferred "\" " "\"+%a, %-d %b %Y at %R %Z\"")
                    print "    )</grid:reason>"
                } else
                    print "      <grid:reason>" escapeXmlContent(reason[i]) "</grid:reason>"
            }
        print "    </grid:reasons>"
    }
}


/^Done/ {
    if (array_found == 0) {
        split($1, summary, " ")
        if (!index(GRIDML_COMPRESSION, "termination-time"))
            print "    <grid:termination-time " timestamp "/>"
        if (!index(GRIDML_COMPRESSION, "total-cpu-usage"))
            print "    <grid:total-cpu-usage>" escapeXmlContent(summary[8]) "</grid:total-cpu-usage>"
        print "    <grid:exit-code>0</grid:exit-code>"
    }
}


/^Exited/ {
    if (array_found == 0) {
        split($1, summary, " ")
        if (!index(GRIDML_COMPRESSION, "termination-time"))
            print "    <grid:termination-time " timestamp "/>"
        if (!index(GRIDML_COMPRESSION, "total-cpu-usage") && summary[11] != "")
            print "    <grid:total-cpu-usage>" escapeXmlContent(summary[11]) "</grid:total-cpu-usage>"
        print "    <grid:exit-code>" (0+summary[5]) "</grid:exit-code>"
    }
}


/^Memory/ {
    if ((!index(GRIDML_COMPRESSION, "memory-usage")) && (array_found == 0))
        print "    <grid:memory-usage>" $2 "</grid:memory-usage>"
}


/^Swap/ {
    if ((!index(GRIDML_COMPRESSION, "swap-usage")) && (array_found == 0))
        print "    <grid:swap-usage>" $2 "</grid:swap-usage>"
}

# Example 
# Resource usage collected. The CPU time used is 382 seconds. MEM: 9 Mbytes; SWAP: 458 Mbytes; NTHREAD: 4; PGID: 10208; PIDs: 10208 10209 10211;

/SWAP:/ {
    swap_start = index($0, "SWAP: ")
    if (swap_start > 0) {
        swap_val = substr($0, swap_start + length("SWAP: "))
        split(swap_val, parts, ";")
        if ((!index(GRIDML_COMPRESSION, "swap-usage")) && (array_found == 0) && parts[1] != "")
            print "    <grid:swap-usage>" parts[1] "</grid:swap-usage>"
    }
}

/MEM:/ {
    mem_start = index($0, "MEM: ")
    if (mem_start > 0) {
        mem_val = substr($0, mem_start + length("MEM: "))
        split(mem_val, parts, ";")
        if ((!index(GRIDML_COMPRESSION, "memory-usage")) && (array_found == 0) && parts[1] != "")
            print "    <grid:memory-usage>" parts[1] "</grid:memory-usage>"
    }
}

/The CPU time used is / {
    if (array_found == 0) {
        cpu_start = index($0, "The CPU time used is ")
        if (cpu_start > 0) {
            cpu_val = substr($0, cpu_start + length("The CPU time used is "))
            split(cpu_val, parts, " ")
            if ((!index(GRIDML_COMPRESSION, "total-cpu-usage")) && parts[1] ~ /^[0-9]+$/)
                print "    <grid:total-cpu-usage>" parts[1] " sec</grid:total-cpu-usage>"
        }
    }
}

/PGID:/ {
    if (array_found == 0) {
        pgid_start = index($0, "PGID: ")
        if (pgid_start > 0) {
            pgid_val = substr($0, pgid_start + length("PGID: "))
            split(pgid_val, parts, ";")
            if ((!index(GRIDML_COMPRESSION, "pid-group")) && parts[1] != "")
                print "    <grid:pid-group>" parts[1] "</grid:pid-group>"
        }
    }
}

/^CPU\t/ {
    if (array_found == 0) {
        split($2, summary, " ")
        if (!index(GRIDML_COMPRESSION, "total-cpu-usage"))
            print "    <grid:total-cpu-usage>" summary[6]" sec</grid:total-cpu-usage>"
    }
}


/^[ \t]*PGID/ {
    if (array_found == 0) {
        split($2, summary, "PIDs:")
        if (!index(GRIDML_COMPRESSION, "pid-group"))
            print "    <grid:pid-group>" summary[1] "</grid:pid-group>"
    }
}

/.*PIDs: / {
    if (array_found == 0) {
        sub (".*PIDs: ", "", $0); sub("; *$", "", $0);
        if (!index(GRIDML_COMPRESSION, "pids"))
            print "    <grid:pids>" $0 "</grid:pids>"
    }
}


#
# vi: ts=4 sw=4 et syntax=awk :
#
