#!/usr/bin/env bash
set -euo pipefail

version="0.1.0"

usage() {
  cat <<'EOF'
Usage: wiki [--raw] [--list-sections|-ls] [--section|-s TITLE ...] [--search] [--featured] [--random] [--news] [--check] [-o file] <query | wikipedia-url>

Fetch a Wikipedia page and print article content as Markdown.
Options may appear before or after the page query.

Options:
  --raw             Print raw Markdown even in a terminal
  --list-sections, -ls  Print the page table of contents
  --section, -s TITLE   Print sections by fuzzy title match; repeatable
  --search          Show search results instead of fetching first match
  --featured         Fetch today's featured article from the main page
  --random           Fetch a random article
  --news             Show Topics in the news from the Current Events portal
  --check            Check if required dependencies are installed

  -o FILE           Write Markdown to FILE instead of stdout
  -h                Show this help
  -v, --version     Show version

Examples:
  wiki "Unix shell"
  wiki "Operating system" --raw
  wiki -ls "Unix shell"
  wiki "Operating system" --list-sections
  wiki -s Origins -s Features "Bash"
  wiki "Operating system" -s hist.
  wiki "Operating system" --section History --section Types
  wiki --search "Rust"
  wiki --random
  wiki --news

  wiki -o bash.md "Bash (Unix shell)"
  wiki "https://en.wikipedia.org/wiki/Curl_(programming_language)"
EOF
}

die() {
  printf 'wiki: %s\n' "$*" >&2
  exit 1
}

need() {
  command -v "$1" >/dev/null 2>&1 || die "missing dependency: $1"
}

check_dependencies() {
  local required_deps=(curl htmlq pandoc)
  local optional_deps=(glow mdcat bat less)
  local all_deps=("${required_deps[@]}" "${optional_deps[@]}")
  local has_all_required=1

  for tool in "${all_deps[@]}"; do
    if command -v "$tool" >/dev/null 2>&1; then
      printf '%s: OK\n' "$tool"
    else
      printf '%s: missing\n' "$tool"
      for req in "${required_deps[@]}"; do
        if [[ "$tool" == "$req" ]]; then
          has_all_required=0
        fi
      done
    fi
  done

  return $((1 - has_all_required))
}


urlencode() {
  local input="$1"
  local i ch out=""
  for ((i = 0; i < ${#input}; i++)); do
    ch="${input:i:1}"
    case "$ch" in
      [a-zA-Z0-9.~_-]) out+="$ch" ;;
      ' ') out+='%20' ;;
      *) printf -v out '%s%%%02X' "$out" "'$ch" ;;
    esac
  done
  printf '%s\n' "$out"
}

fetch() {
  local url="$1"
  local out="$2"

  curl -fsSL \
    -A 'wiki/0.1 (+https://wikipedia.org)' \
    -o "$out" \
    -w '%{url_effective}' \
    "$url"
}

get_featured_url() {
  local main_page_url="https://en.wikipedia.org/wiki/Main_Page"
  local tmp_file
  tmp_file="$(mktemp)"
  trap 'rm -f "$tmp_file"' RETURN
  curl -fsSL -A 'wiki/0.1 (+https://wikipedia.org)' "$main_page_url" > "$tmp_file"
  local href
  href="$(htmlq --attribute href '#mp-tfa p b a' < "$tmp_file" 2>/dev/null | head -1)"
  [[ -n "$href" ]] || die "could not extract featured article link"
  if [[ "$href" == /* ]]; then
    echo "https://en.wikipedia.org$href"
  else
    echo "$href"
  fi
}

first_line() {
  local line=""
  IFS= read -r line || true
  printf '%s' "$line"
}

render_pretty() {
  if [[ ! -t 1 ]]; then
    cat
  elif command -v glow >/dev/null 2>&1; then
    glow -s auto -
  elif command -v mdcat >/dev/null 2>&1; then
    # mdcat doesn't page, pipe to less if available
    if command -v less >/dev/null 2>&1; then
      mdcat | less -RF
    else
      mdcat
    fi
  elif command -v bat >/dev/null 2>&1; then
    bat -l markdown --style=plain --paging=auto
  else
    # fallback: use less if terminal and available
    if command -v less >/dev/null 2>&1; then
      less -RF
    else
      cat
    fi
  fi
}

page_if_tty() {
  if [[ -t 1 ]] && command -v less >/dev/null 2>&1; then
    less -RF
  else
    cat
  fi
}

show_search_results() {
  local html_file="$1"
  local count=0
  local href text title url query_display
  local -a hrefs texts
  mapfile -t hrefs < <(htmlq --attribute href '.mw-search-result-heading a' < "$html_file" 2>/dev/null)
  mapfile -t texts < <(htmlq --text '.mw-search-result-heading a' < "$html_file" 2>/dev/null)

  if (( ${#hrefs[@]} != ${#texts[@]} )); then
    die "internal error: mismatch between hrefs and texts"
  fi
  printf 'Search results for "%s":\n\n' "$query"

  for idx in "${!hrefs[@]}"; do

    ((count++)) || true
    href="${hrefs[idx]}"
    text="${texts[idx]}"

    title="$(trim_text "$text")"

    if [[ "$href" == /* ]]; then
      url="https://en.wikipedia.org$href"
    else
      url="$href"
    fi
    query_display="$title"
    printf '%d. %s\n' "$count" "$title"
    printf '   Query: "%s"\n' "$query_display"
    printf '   URL: %s\n\n' "$url"
  done
  if ((count == 0)); then
    die "no search results for: $query"
  fi
}

plain_markdown_text() {
  printf '%s\n' "$1" | pandoc --from=gfm --to=plain --wrap=none 2>/dev/null | tr -d '\r'
}

trim_text() {
  local text="$1"
  text="${text//$'\t'/ }"
  while [[ "$text" == *"  "* ]]; do
    text="${text//  / }"
  done
  text="${text#"${text%%[![:space:]]*}"}"
  text="${text%"${text##*[![:space:]]}"}"
  printf '%s\n' "$text"
}

normalize_heading() {
  local heading
  heading="$(trim_text "$(plain_markdown_text "$1")")"
  printf '%s\n' "$heading" | tr '[:upper:]' '[:lower:]' | tr -cd '[:alnum:]\n'
}

write_markdown() {
  local destination="$1"
  {
    if [[ -n "$title" ]]; then
      printf '# %s\n\n' "$title"
    fi
    printf '_Source: <%s>_\n\n' "$page_url"
    pandoc --from=html --to=gfm --wrap=none "$tmp_fragment"
  } > "$destination"
}

write_section_list() {
  local source_file="$1"
  local saw_section=0
  local line hashes heading level indent

  if [[ -n "$title" ]]; then
    printf '# %s\n\n' "$title"
  fi
  printf '_Source: <%s>_\n\n' "$page_url"

  while IFS= read -r line; do
    [[ "$line" =~ ^(#{2,})[[:space:]]+(.*)$ ]] || continue
    hashes="${BASH_REMATCH[1]}"
    heading="$(plain_markdown_text "${BASH_REMATCH[2]}")"
    level=${#hashes}
    indent=$(((level - 2) * 2))
    printf '%*s- %s\n' "$indent" '' "$heading"
    saw_section=1
  done < "$source_file"

  ((saw_section)) || die "no sections found for: $page_url"
}

write_selected_sections() {
  local source_file="$1"
  shift
  local -a requested=("$@")
  local -a lines headings=() starts=() levels=() normalized_headings=() parents=() matched_indexes=() normalized_targets=() target_found=() chain=()
  local line idx next_idx end_idx level heading target normalized_target parent_idx chain_idx content_start match_index printed_any=0
  local target_idx
  declare -A matched_lookup=()

  mapfile -t lines < "$source_file"

  for idx in "${!lines[@]}"; do
    line="${lines[idx]}"
    [[ "$line" =~ ^(#{2,})[[:space:]]+(.*)$ ]] || continue
    heading="$(trim_text "$(plain_markdown_text "${BASH_REMATCH[2]}")")"
    headings+=("$heading")
    starts+=("$idx")
    levels+=("${#BASH_REMATCH[1]}")
    normalized_headings+=("$(normalize_heading "$heading")")
    parent_idx=-1
    for ((next_idx = ${#levels[@]} - 2; next_idx >= 0; next_idx--)); do
      if (( levels[next_idx] < levels[${#levels[@]} - 1] )); then
        parent_idx=$next_idx
        break
      fi
    done
    parents+=("$parent_idx")
  done

  ((${#headings[@]} > 0)) || die "no sections found for: $page_url"

  for target in "${requested[@]}"; do
    normalized_target="$(normalize_heading "$target")"
    [[ -n "$normalized_target" ]] || die "section query is empty after normalization: $target"
    normalized_targets+=("$normalized_target")
    target_found+=(0)
  done

  for idx in "${!headings[@]}"; do
    for target_idx in "${!normalized_targets[@]}"; do
      [[ "${normalized_headings[idx]}" == *"${normalized_targets[target_idx]}"* ]] || continue
      target_found[target_idx]=1
      if [[ -z "${matched_lookup[$idx]:-}" ]]; then
        matched_lookup[$idx]=1
        matched_indexes+=("$idx")
      fi
    done
  done

  for target_idx in "${!requested[@]}"; do
    (( target_found[target_idx] )) || die "section not found: ${requested[target_idx]}"
  done

  if [[ -n "$title" ]]; then
    printf '# %s

' "$title"
  fi
  printf '_Source: <%s>_

' "$page_url"

  for match_index in "${!matched_indexes[@]}"; do
    idx=${matched_indexes[match_index]}
    level=${levels[idx]}
    end_idx=${#lines[@]}

    for ((next_idx = idx + 1; next_idx < ${#headings[@]}; next_idx++)); do
      if (( levels[next_idx] <= level )); then
        end_idx=${starts[next_idx]}
        break
      fi
    done

    chain=()
    parent_idx=$idx
    while (( parent_idx >= 0 )); do
      chain=("$parent_idx" "${chain[@]}")
      parent_idx=${parents[parent_idx]}
    done

    (( printed_any == 0 )) || printf '
'
    printed_any=1

    for chain_idx in "${chain[@]}"; do
      printf '%s %s
' "$(printf '%*s' "${levels[chain_idx]}" '' | tr ' ' '#')" "${headings[chain_idx]}"
    done
    printf '
'

    content_start=$((starts[idx] + 1))
    for ((line = content_start; line < end_idx; line++)); do
      if [[ "${lines[line]}" =~ ^(#{2,})[[:space:]]+(.*)$ ]]; then
        printf '%s %s
' "${BASH_REMATCH[1]}" "$(trim_text "$(plain_markdown_text "${BASH_REMATCH[2]}")")"
      else
        printf '%s
' "${lines[line]}"
      fi
    done
  done
}


output=""
raw=0
list_sections=0
search_mode=0
featured_mode=0
random_mode=0
news_mode=0
check_mode=0
declare -a sections=()
declare -a query_parts=()
parsing_options=1

while (($# > 0)); do
  if ((parsing_options)); then
    case "$1" in
      --raw)
        raw=1
        shift
        continue
        ;;
      --list-sections|-ls)
        list_sections=1
        shift
        continue
        ;;
      --search)
        search_mode=1
        shift
        continue
        ;;
      --featured)
        featured_mode=1
        shift
        continue
        ;;
      --random)
        random_mode=1
        shift
        continue
        ;;
      --news)
        news_mode=1
        shift
        continue
        ;;
      --check)
        check_mode=1
        shift
        continue
        ;;
      --section|-s)
        (($# >= 2)) || die "option --section requires a value"
        sections+=("$2")
        shift 2
        continue
        ;;

      -o)
        (($# >= 2)) || die "option -o requires a value"
        output="$2"
        shift 2
        continue
        ;;
      -h|--help)
        usage
        exit 0
        ;;
      -v|--version)
        printf 'wiki %s\n' "$version"
        exit 0
        ;;
      --)
        parsing_options=0
        shift
        continue
        ;;
      -*)
        die "unknown option: $1"
        ;;
    esac
  fi

  query_parts+=("$1")
  shift
done

if ((check_mode)); then
  check_dependencies
  exit $?
fi

if (( ! featured_mode && ! random_mode && ! news_mode )) && (( ${#query_parts[@]} == 0 )); then
  usage >&2
  exit 64
fi

need curl
need htmlq
need pandoc

if ((list_sections)) && ((${#sections[@]} > 0)); then
  die "use either --list-sections or --section, not both"
fi

if ((search_mode)); then
  if ((list_sections)) || ((${#sections[@]} > 0)); then
    die "--search cannot be combined with --list-sections or --section"
  fi
fi

if ((featured_mode)); then
  if ((random_mode)); then
    die "--featured cannot be combined with --random"
  fi
  if ((search_mode)); then
    die "--featured cannot be combined with --search"
  fi
fi

if ((random_mode)); then
  if ((featured_mode)); then
    die "--random cannot be combined with --featured"
  fi
  if ((search_mode)); then
    die "--random cannot be combined with --search"
  fi
  if ((${#sections[@]} > 0)); then
    die "--random cannot be combined with --section"
  fi
fi

if ((news_mode)); then
  if ((featured_mode)); then
    die "--news cannot be combined with --featured"
  fi
  if ((random_mode)); then
    die "--news cannot be combined with --random"
  fi
  if ((search_mode)); then
    die "--news cannot be combined with --search"
  fi
  if ((list_sections)) || ((${#sections[@]} > 0)); then
    die "--news cannot be combined with --list-sections or --section"
  fi
fi

query="${query_parts[*]}"

if ((search_mode)) && [[ "$query" =~ ^https?:// ]]; then
  die "--search requires a search query, not a URL"
fi

tmp_html="$(mktemp)"
tmp_fragment="$(mktemp)"
tmp_markdown="$(mktemp)"
trap 'rm -f "$tmp_html" "$tmp_fragment" "$tmp_markdown"' EXIT

if ((featured_mode)); then
  page_url="$(get_featured_url)"
  fetch "$page_url" "$tmp_html" >/dev/null
elif ((random_mode)); then
  page_url="$(fetch "https://en.wikipedia.org/wiki/Special:Random" "$tmp_html")"
elif ((news_mode)); then
  page_url="https://en.wikipedia.org/wiki/Portal:Current_events"
  fetch "$page_url" "$tmp_html" >/dev/null
elif [[ "$query" =~ ^https?:// ]]; then
  page_url="$(fetch "$query" "$tmp_html")"
else
  if ((search_mode)); then
    search_url="https://en.wikipedia.org/wiki/Special:Search?search=$(urlencode "$query")&fulltext=1"
  else
    search_url="https://en.wikipedia.org/wiki/Special:Search?search=$(urlencode "$query")&go=Go&ns0=1"
  fi
  page_url="$(fetch "$search_url" "$tmp_html")"
fi

if ((search_mode)); then
  if ((raw)); then
    show_search_results "$tmp_html"
  else
    show_search_results "$tmp_html" | page_if_tty
  fi
  exit 0
fi

if ! ((search_mode)); then
  if [[ "$page_url" == *"Special:Search"* || "$page_url" == *"/w/index.php?search="* ]]; then
    first_result="$(
      htmlq --attribute href '.mw-search-result-heading a' < "$tmp_html" 2>/dev/null | first_line
    )"
    [[ -n "$first_result" ]] || die "no article matched: $query"

    if [[ "$first_result" == /* ]]; then
      page_url="https://en.wikipedia.org${first_result}"
    else
      page_url="$first_result"
    fi

    fetch "$page_url" "$tmp_html" >/dev/null
  fi
fi

if ((news_mode)); then
  selector='div.p-current-events-headlines'
else
  selector='main #mw-content-text .mw-parser-output > p, main #mw-content-text .mw-parser-output > .mw-heading > h2, main #mw-content-text .mw-parser-output > .mw-heading > h3, main #mw-content-text .mw-parser-output > .mw-heading > h4, main #mw-content-text .mw-parser-output > .mw-heading > h5, main #mw-content-text .mw-parser-output > .mw-heading > h6, main #mw-content-text .mw-parser-output > ul, main #mw-content-text .mw-parser-output > ol, main #mw-content-text .mw-parser-output > dl, main #mw-content-text .mw-parser-output > blockquote, main #mw-content-text .mw-parser-output > pre'
fi

htmlq --ignore-whitespace "$selector" < "$tmp_html" > "$tmp_fragment"
[[ -s "$tmp_fragment" ]] || die "could not extract article body from: $page_url"

if ((news_mode)); then
  title="Topics in the news"
else
  title="$(htmlq --text '#firstHeading' < "$tmp_html" 2>/dev/null || true)"
fi

write_markdown "$tmp_markdown"

if [[ -n "$output" ]]; then
  if ((list_sections)); then
    write_section_list "$tmp_markdown" > "$output"
  elif ((${#sections[@]} > 0)); then
    write_selected_sections "$tmp_markdown" "${sections[@]}" > "$output"
  else
    cat "$tmp_markdown" > "$output"
  fi
elif ((raw)); then
  if ((list_sections)); then
    write_section_list "$tmp_markdown" | page_if_tty
  elif ((${#sections[@]} > 0)); then
    write_selected_sections "$tmp_markdown" "${sections[@]}" | page_if_tty
  else
    page_if_tty < "$tmp_markdown"
  fi
else
  if ((list_sections)); then
    write_section_list "$tmp_markdown" | render_pretty
  elif ((${#sections[@]} > 0)); then
    write_selected_sections "$tmp_markdown" "${sections[@]}" | render_pretty
  else
    render_pretty < "$tmp_markdown"
  fi
fi
