#!/bin/bash
# Author: José Antonio Manso García
# License: CC BY-NC 4.0
# This script is licensed under the Creative Commons Attribution-NonCommercial 4.0 International License.
# It may not be used for commercial purposes without explicit permission from the author.
# More info: https://creativecommons.org/licenses/by-nc/4.0/

read -p "Enter the number of top compounds to display: " top_n

# Get the current timestamp
timestamp=$(date +"%Y_%m_%d_%H%M")

# Automatically detect the SDF folder
WORKDIR="$(pwd)"
sdf_dir=""

if [[ -d "$WORKDIR/fda_sdf_compounds" ]]; then
    sdf_dir="$WORKDIR/fda_sdf_compounds"
elif [[ $(find "$WORKDIR" -maxdepth 1 -type d -name "library_sdf_*") ]]; then
    sdf_dir=$(find "$WORKDIR" -maxdepth 1 -type d -name "library_sdf_*" | head -n 1)
else
    echo "Error: No valid SDF folder found."
    exit 1
fi

# Function to extract ZINC ID from SDF
get_zinc_id_from_sdf() {
    local sdf_file="$1"
    if [[ -f "$sdf_file" ]]; then
        zinc_id=$(awk '/^ZINC[0-9]+/ { print $1; exit }' "$sdf_file")
        echo "$zinc_id"
    else
        echo "ZINC ID not found"
    fi
}

while true; do
    echo "Select an option:"
    echo "1) Top compounds based on affinity of first mode"
    echo "2) Top compounds based on affinity + SimScore + TotalModes + MW (only if using a FDA library)"
    echo "3) Exit"
    read -p "Enter your choice: " choice

    log_dir="$WORKDIR/docking_results"

    case $choice in
        
        1)
    echo "Executing option 1. Please wait — this may take a few minutes depending on the number of results..."

    method_results_file="$WORKDIR/results_affinity_$timestamp.txt"
    result_file="$WORKDIR/top_${top_n}_hits_affinity_$timestamp.txt"

    # Column widths
    width_affinity=10
    width_link=65
    width_filename=30

    # Write header
    printf "%-*s%-*s%-*s\n" \
      $width_affinity "Affinity" \
      $width_link "ZINC_Link" \
      $width_filename "Filename" > "$method_results_file"

    # Reuse get_zinc_id_from_sdf from Option 2
    get_zinc_id_from_sdf() {
        local sdf_file="$1"
        local zinc_id=""
        if [[ -f "$sdf_file" ]]; then
            zinc_id=$(grep -A1 -i "<ZINC_ID>" "$sdf_file" | tail -n1 | tr -d '\r[:space:]')
            if [[ -z "$zinc_id" ]]; then
                first_line=$(head -n 1 "$sdf_file" | tr -d '\r[:space:]')
                if echo "$first_line" | grep -qE "^ZINC[0-9]+$"; then
                    zinc_id="$first_line"
                fi
            fi
        fi
        echo "$zinc_id"
    }

    for log_file in "$log_dir"/*.log; do
        affinity=$(awk '$1 == 1 { print $2; exit }' "$log_file")
        filename=$(basename "$log_file" .log)
        base_name=$(echo "$filename" | sed 's/_docking.pdbqt//')

        # Search SDF file like in Option 2
        sdf_file=""
        found_sdf=false
        for candidate_dir in "$WORKDIR/fda_sdf_compounds" "$WORKDIR"/library_sdf_*; do
            test_file="$candidate_dir/${base_name}.sdf"
            if [ -f "$test_file" ]; then
                sdf_file="$test_file"
                found_sdf=true
                break
            fi
        done

        if [[ -n "$affinity" ]]; then
            if [[ "$found_sdf" == true ]]; then
                zinc_id=$(get_zinc_id_from_sdf "$sdf_file")
                if [[ -n "$zinc_id" ]]; then
                    zinc_link="https://zinc.docking.org/substances/$zinc_id/"
                else
                    zinc_link="N/A"
                fi
            else
                zinc_link="N/A"
            fi

            printf "%-*s%-*s%-*s\n" \
              $width_affinity "$affinity" \
              $width_link "$zinc_link" \
              $width_filename "$filename" >> "$method_results_file"
        fi
    done

    (head -n 1 "$method_results_file" && tail -n +2 "$method_results_file" | LC_ALL=C sort -g | head -n "$top_n") > "$result_file"

    echo "Results saved to $result_file"
    cat "$result_file"
    exit 0
    ;;

        2)
echo "Executing option 2. Please wait — this may take several minutes or even hours, depending on the number of results..."

method_results_file="$WORKDIR/results_affinity_and_simscore_$timestamp.txt"
result_file="$WORKDIR/top_${top_n}_hits_affinity_and_simscore_$timestamp.txt"

# Determine if FDA library is used
uses_fda_library=false
if compgen -G "$WORKDIR/fda_sdf_compounds/*.sdf" > /dev/null; then
    uses_fda_library=true
fi

# Set table header format
if $uses_fda_library; then
    printf "%-10s %-12s %-12s %-6s %-65s %-30s\n" "Affinity" "SimScore" "TotalModes" "MW" "ZINC_Link" "Filename" > "$method_results_file"
else
    printf "%-10s %-12s %-12s %-65s %-30s\n" "Affinity" "SimScore" "TotalModes" "ZINC_Link" "Filename" > "$method_results_file"
fi

# Function to extract molecular weight using Open Babel
get_mw_from_sdf() {
    local sdf_file="$1"
    obabel "$sdf_file" -osmi --append MW 2>/dev/null | awk '{print int($2)}'
}

get_zinc_id_from_sdf() {
    local sdf_file="$1"
    local zinc_id=""
    if [[ -f "$sdf_file" ]]; then
        zinc_id=$(grep -A1 -i "<ZINC_ID>" "$sdf_file" | tail -n1 | tr -d '\r[:space:]')
        if [[ -z "$zinc_id" ]]; then
            first_line=$(head -n 1 "$sdf_file" | tr -d '\r[:space:]')
            if echo "$first_line" | grep -qE "^ZINC[0-9]+$"; then
                zinc_id="$first_line"
            fi
        fi
    fi
    echo "$zinc_id"
}

for log_file in "$log_dir"/*.log; do
    if [ -f "$log_file" ]; then
        affinity=$(awk '$1 == 1 { print $2; exit }' "$log_file")
        filename=$(basename "$log_file" .log)
        base_name=$(echo "$filename" | sed 's/_docking.pdbqt//')

        sdf_file=""
        found_sdf=false
        for candidate_dir in "$WORKDIR/fda_sdf_compounds" "$WORKDIR"/library_sdf_*; do
            test_file="$candidate_dir/${base_name}.sdf"
            if [ -f "$test_file" ]; then
                sdf_file="$test_file"
                found_sdf=true
                break
            fi
        done

        count_l_b=0
        count_u_b=0
        total_modes=0

        while read -r line; do
            if [[ "$line" =~ ^[[:space:]]*[0-9]+[[:space:]]+[-0-9.]+[[:space:]]+[-0-9.]+[[:space:]]+[-0-9.]+$ ]]; then
                ((total_modes++))
                l_b_rmsd=$(echo "$line" | awk '{print $3}')
                u_b_rmsd=$(echo "$line" | awk '{print $4}')
                (( $(echo "$l_b_rmsd >= 0 && $l_b_rmsd < 1.6" | bc -l) )) && ((count_l_b++))
                (( $(echo "$u_b_rmsd >= 0 && $u_b_rmsd < 3.2" | bc -l) )) && ((count_u_b++))
            fi
        done < "$log_file"

        if [[ -n "$affinity" && $total_modes -gt 0 && -f "$sdf_file" ]]; then
            pct_l_b=$(echo "scale=2; 100 * ($count_l_b - 1) / $total_modes" | bc -l)
            pct_u_b=$(echo "scale=2; 100 * ($count_u_b - 1) / $total_modes" | bc -l)
            pct_sum=$(printf "%.0f" $(echo "($pct_l_b + $pct_u_b) / 2" | bc -l))

            zinc_id=$(get_zinc_id_from_sdf "$sdf_file")
            zinc_link="https://zinc.docking.org/substances/$zinc_id/"
            [[ -z "$zinc_id" ]] && zinc_link="N/A"

            if $uses_fda_library; then
                mw=$(get_mw_from_sdf "$sdf_file")
                printf "%-10s %-12s %-12s %-6s %-65s %-30s\n" \
                    "$affinity" "$pct_sum" "$total_modes" "${mw:-N/A}" "$zinc_link" "$filename" >> "$method_results_file"
            else
                printf "%-10s %-12s %-12s %-65s %-30s\n" \
                    "$affinity" "$pct_sum" "$total_modes" "$zinc_link" "$filename" >> "$method_results_file"
            fi
        fi
    fi
done

# Sort and select top N
if $uses_fda_library; then
    (head -n 1 "$method_results_file" && tail -n +2 "$method_results_file" | LC_ALL=C sort -g | head -n "$top_n") > "$result_file"
else
    (head -n 1 "$method_results_file" && tail -n +2 "$method_results_file" | LC_ALL=C sort -g | head -n "$top_n") > "$result_file"
fi

# Final format for terminal view
if $uses_fda_library; then
    width_affinity=10
    width_simscore=12
    width_modes=12
    width_mw=6
    width_link=65
    width_filename=30

    tmp_file=$(mktemp)
    printf "%-*s%-*s%-*s%-*s%-*s%-*s\n" \
      $width_affinity "Affinity" \
      $width_simscore "SimScore" \
      $width_modes "TotalModes" \
      $width_mw "MW" \
      $width_link "ZINC_Link" \
      $width_filename "Filename" > "$tmp_file"

    tail -n +2 "$result_file" | while read -r line; do
        affinity=$(echo "$line" | awk '{print $1}')
        simscore=$(echo "$line" | awk '{print $2}')
        total_modes=$(echo "$line" | awk '{print $3}')
        mw=$(echo "$line" | awk '{print $4}')
        zinc_link=$(echo "$line" | awk '{print $5}')
        filename=$(echo "$line" | awk '{for(i=6;i<=NF;++i) printf $i " "; print ""}' | sed 's/ *$//')

        printf "%-*s%-*s%-*s%-*s%-*s%-*s\n" \
          $width_affinity "${affinity:-N/A}" \
          $width_simscore "${simscore:-N/A}" \
          $width_modes "${total_modes:-N/A}" \
          $width_mw "${mw:-N/A}" \
          $width_link "${zinc_link:-N/A}" \
          $width_filename "${filename:-N/A}"
    done >> "$tmp_file"
else
    width_affinity=10
    width_simscore=12
    width_modes=12
    width_link=65
    width_filename=30

    tmp_file=$(mktemp)
    printf "%-*s%-*s%-*s%-*s%-*s\n" \
      $width_affinity "Affinity" \
      $width_simscore "SimScore" \
      $width_modes "TotalModes" \
      $width_link "ZINC_Link" \
      $width_filename "Filename" > "$tmp_file"

    tail -n +2 "$result_file" | while read -r line; do
        affinity=$(echo "$line" | awk '{print $1}')
        simscore=$(echo "$line" | awk '{print $2}')
        total_modes=$(echo "$line" | awk '{print $3}')
        zinc_link=$(echo "$line" | awk '{print $4}')
        filename=$(echo "$line" | awk '{for(i=5;i<=NF;++i) printf $i " "; print ""}' | sed 's/ *$//')

        printf "%-*s%-*s%-*s%-*s%-*s\n" \
          $width_affinity "${affinity:-N/A}" \
          $width_simscore "${simscore:-N/A}" \
          $width_modes "${total_modes:-N/A}" \
          $width_link "${zinc_link:-N/A}" \
          $width_filename "${filename:-N/A}"
    done >> "$tmp_file"
fi

mv "$tmp_file" "$result_file"

echo -e "\nResults saved to:"
echo "$result_file"
cat "$result_file"

exit 0
;;
        3)
            echo "Exiting..."
            exit 0
            ;;
        *)
            echo "Invalid choice. Please select a valid option."
            ;;
    esac
done
