#!/usr/bin/env ruby

if ARGV.length != 3
  puts "Usage: #{File.basename(__FILE__)} <ksy-dir> <target-dir> <html-dir>"
  exit 1
end

require 'yaml'
require 'fileutils'
require 'erb'
require 'cgi'
require 'set'

require 'commonmarker'
require 'pygments'

class FormatPagesGenerator
  CPP_STL_CANONICAL = {id: 'cpp_stl', name: 'C++/STL'}
  LANGS = {
    'index' => {name: 'Overview', lexer: 'yaml'},

    'cpp_stl_11' => {name: 'C++11/STL', lexer: 'cpp', canonical: CPP_STL_CANONICAL},
    'cpp_stl_98' => {name: 'C++98/STL', lexer: 'cpp', canonical: CPP_STL_CANONICAL},
    'csharp' => {name: 'C#', ext: 'cs'},
    'go' => {name: 'Go', ext: 'go'},
    'graphviz' => {name: 'GraphViz', ext: 'dot'},
    'java' => {name: 'Java', ext: 'java'},
    'javascript' => {name: 'JavaScript', ext: 'js'},
    'lua' => {name: 'Lua', ext: 'lua'},
    'nim' => {name: 'Nim', ext: 'nim'},
    'perl' => {name: 'Perl', ext: 'pm'},
    'php' => {name: 'PHP', ext: 'php'},
    'python' => {name: 'Python', ext: 'py'},
    'ruby' => {name: 'Ruby', ext: 'rb'},
  }

  CATS = {
    '3d' => {title: '3D Models', icon: 'cube'},
    'archive' => {title: 'Archive Files', icon: 'archive'},
    'android' => {title: 'Android-specific', icon: 'android'},
    'cad' => {title: 'CAD', icon: 'space-shuttle'},
    'common' => {title: 'Commonly Used Data Types', icon: 'arrows'},
    'database' => {title: 'Databases', icon: 'database'},
    'dos' => {title: 'DOS-specific', icon: 'terminal'},
    'executable' => {title: 'Executables and Byte-code', icon: 'gears'},
    'filesystem' => {title: 'Filesystems', icon: 'hdd-o'},
    'firmware' => {title: 'Firmware', icon: 'microchip'},
    'font' => {title: 'Fonts', icon: 'font'},
    'game' => {title: 'Game Data Files', icon: 'gamepad'},
    'geospatial' => {title: 'Geospatial (Maps)', icon: 'map'},
    'hardware' => {title: 'Hardware Protocols', icon: 'microchip'},
    'image' => {title: 'Image Files', icon: 'file-image-o'},
    'linux' => {title: 'GNU/Linux-specific', icon: 'linux'},
    'log' => {title: 'Logs', icon: 'database'},
    'machine_code' => {title: 'CPU / Machine Code Disassembly', icon: 'industry'},
    'macos' => {title: 'macOS-specific', icon: 'apple'},
    'media' => {title: 'Multimedia Files', icon: 'video-camera'},
    'network' => {title: 'Networking Protocols', icon: 'sitemap'},
    'scientific' => {title: 'Scientific Applications', icon: 'flask'},
    'security' => {title: 'Security', icon: 'lock'},
    'serialization' => {title: 'Serialization Protocols'},
    'windows' => {title: 'Windows-specific', icon: 'windows'},
  }

  VALID_LICENSES = Set.new([
    'CC0-1.0',
    'MIT',
    'Unlicense',
    'WTFPL',
  ])

  def code(lang, src)
    Pygments.highlight(src, :lexer => LANGS[lang][:lexer] || lang)
  end

  def initialize(ksy_dir, target_dir, html_dir)
    @ksy_dir = File.realpath(ksy_dir)
    @target_dir = File.realpath(target_dir)
    FileUtils::mkdir_p(html_dir)
    @html_dir = File.realpath(html_dir)

    File.write("#{@html_dir}/pygments-default.css", Pygments.css('.highlight'))

    @erb_header = erb('header')
    @erb_fheader = erb('format_header')
    @erb_base = erb('format_base')
    @erb_index = erb('format_index')
    @erb_lang = erb('format_lang')
    @erb_xref = erb('format_xref')
    @erb_footer = erb('footer')

    @erb_usage = {}
    LANGS.each_key { |lang|
      lang_prim = LANGS[lang].fetch(:canonical, {})[:id] || lang
      fn = "usage_#{lang_prim}"
      @erb_usage[lang] = erb(fn) if FileTest.exists?("#{fn}.html.erb")
    }

    @base_url = '//kaitai.io/'

    @all = {}
    @by_cat = {}
  end

  def run
    Dir.chdir(@ksy_dir)
    Dir.glob("**/*.ksy").each { |ksy|
      begin
        yaml_str = File.read(ksy)
        yaml = YAML.load(yaml_str)
        file_id = yaml['meta']['id']
        file_dir = "#{@html_dir}/#{file_id}"
        cat_id = File::dirname(ksy).gsub(/\/.*$/, '')

        file_tags = Set.new(yaml['meta']['tags']) || Set.new
        file_tags << cat_id

        puts
        puts "## #{file_id}"
        puts

        FileUtils::mkdir_p(file_dir)

        good_langs = ['index']
        LANGS.each_key { |lang|
          next if lang == 'index'
          good_langs << lang if copy_target_to_html(file_id, "#{file_dir}/src", lang)
        }

        good_langs.each { |lang|
          generate_lang_page(lang, file_id, file_dir, yaml, yaml_str, cat_id, good_langs)
        }

        generate_base_page(file_id, file_dir, yaml, yaml_str, cat_id, good_langs)

        @all[file_id] = yaml
        by_cat_record = {
          :id => file_id,
          :ksy => ksy,
        }

        file_tags.each { |tag|
          @by_cat[tag] ||= []
          @by_cat[tag] << by_cat_record
        }
      rescue Psych::SyntaxError => e
        $stderr.puts "## on file #{ksy.inspect}"
        $stderr.puts e.inspect
      end
    }

    generate_index
    generate_xref
  end

  def generate_base_page(file_id, out_dir, yaml, yaml_str, cat_id, good_langs)
    File.open("#{out_dir}/index.html", 'w') { |out|
      meta = yaml['meta']

      license = meta['license']
      if license
        warn "#{file_id}: license #{license.inspect} does not look valid" unless VALID_LICENSES.include?(license)
      else
        warn "#{file_id}: no license"
      end

      format_name = get_format_name(meta)
      cur_lang = 'index'
      page_title = format_name + " format spec for Kaitai Struct"
      out.write @erb_header.result(binding)
      out.write @erb_fheader.result(binding)
      out.write @erb_base.result(binding)
      out.write @erb_footer.result(binding)
    }
  end

  def generate_lang_page(cur_lang, file_id, out_dir, yaml, yaml_str, cat_id, good_langs)
    puts "  * generating #{cur_lang} page for #{file_id}"
    File.open("#{out_dir}/#{cur_lang}.html", 'w') { |out|
      meta = yaml['meta']
      format_name = get_format_name(meta)
      page_title = "#{format_name}: " + (if cur_lang == 'graphviz'
        "GraphViz block diagram (.dot) source: Kaitai Struct"
      else
        "#{LANGS[cur_lang][:name]} parsing library"
      end)

      src_files = source_file_names(file_id, cur_lang).map { |fn|
        path = "src/#{cur_lang}/#{fn}"
        begin
          puts "    * reading source file #{out_dir}/#{path}"
          src = File.read("#{out_dir}/#{path}")
        rescue Errno::ENOENT
          puts "      ... not found!"
          src = nil
        end
        {
          :filename => fn,
          :path => path,
          :src => src,
        }
      }.reject { |x| x[:src].nil? }

      usage_attrs = get_usage_attrs(yaml)

      out.write @erb_header.result(binding)
      out.write @erb_fheader.result(binding)
      out.write @erb_lang.result(binding)
      out.write @erb_footer.result(binding)
    }
  end

  def generate_index
    File.open("#{@html_dir}/index.html", 'w') { |out|
      page_title = "File Format Gallery for Kaitai Struct"
      out.write @erb_header.result(binding)
      out.write @erb_index.result(binding)
      out.write @erb_footer.result(binding)
    }
  end

  def generate_xref
    File.open("#{@html_dir}/xref.html", 'w') { |out|
      page_title = "File Format Cross-References for Kaitai Struct"
      out.write @erb_header.result(binding)
      out.write @erb_xref.result(binding)
      out.write @erb_footer.result(binding)
    }
  end

  def get_usage_attrs(yaml)
    res = []

    # Try documented seq attributes
    (yaml['seq'] || []).each { |attr|
      if attr['id'] and attr['doc']
        res << usage_attr(attr['id'], attr)
        break
      end
    }

    # Try documented instance attributes
    (yaml['instances'] || {}).each_pair { |attr_name, attr|
      if attr['doc']
        res << usage_attr(attr_name, attr)
        break
      end
    }

    if res.empty?
      # Try undocumented seq attributes
      (yaml['seq'] || []).each { |attr|
        if attr['id']
          res << usage_attr(attr['id'], attr)
          break
        end
      }
    end

    if res.empty?
      # Try undocumented instance attributes
      (yaml['instances'] || {}).each_pair { |attr_name, attr|
        res << usage_attr(attr_name, attr)
        break
      }
    end

    res
  end

  def usage_attr(name, attr)
    {
      :name => name,
      :doc => attr['doc'] || "get #{name.gsub(/_/, ' ')}",
    }
  end

  def copy_target_to_html(file_id, out_dir, lang)
    tgt_dir = "#{out_dir}/#{lang}"
    FileUtils::mkdir_p(tgt_dir)

    files = source_file_gen_names(file_id, lang)

    files.each { |tgt_file|
      puts "  * copying #{@target_dir}/#{lang}/#{tgt_file} => #{tgt_dir}/#{File.basename(tgt_file)}"
      begin
        FileUtils::cp("#{@target_dir}/#{lang}/#{tgt_file}", "#{tgt_dir}/#{File.basename(tgt_file)}")
      rescue Errno::ENOENT
        $stderr.puts "#{@target_dir}/#{lang}/#{tgt_file}: file not found"
        return false
      end
    }
    return true
  end

  def source_file_gen_names(file_id, lang)
    src_files = source_file_names(file_id, lang)
    case lang
    when 'go', 'java'
      src_files.map { |file| "src/#{file}" }
    else
      src_files
    end
  end

  def source_file_names(file_id, lang)
    case lang
    when 'cpp_stl_98', 'cpp_stl_11'
      ["#{file_id}.h", "#{file_id}.cpp"]
    when 'csharp', 'java', 'javascript', 'perl', 'php'
      ["#{ucc(file_id)}.#{LANGS[lang][:ext]}"]
    when 'go', 'graphviz', 'lua', 'nim', 'ruby', 'python'
      ["#{file_id}.#{LANGS[lang][:ext]}"]
    else
      []
    end
  end

  def get_format_name(meta)
    title = meta['title']
    return title if title

    ext = meta['file-extension']
    app = meta['application']
    if ext
      name = readable_exts(ext)
      name << " of #{app}" if app
      return name
    end

    meta['id']
  end

  def readable_exts(ext)
    name = if ext.is_a?(Array)
             ext.map { |x| ".#{x}" }.join(' / ')
           else
             ".#{ext}"
           end
    name << " file format"
    name
  end

  def ucc(name)
    name.split(/_/).map { |x| x.capitalize }.join
  end

  def lcc(name)
    parts = name.split(/_/)
    parts[0] + parts[1..-1].map { |x| x.capitalize }.join
  end

  def xref_url(name, value)
    case name
    when 'iso'
      nil
    when 'forensicswiki'
      "https://forensicswiki.xyz/page/#{value}"
    when 'justsolve'
      "http://fileformats.archiveteam.org/wiki/#{value}"
    when 'loc'
      "https://www.loc.gov/preservation/digital/formats/fdd/#{value}.shtml"
    when 'mime'
      "https://www.iana.org/assignments/media-types/#{value}"
    when 'pronom'
      "https://www.nationalarchives.gov.uk/pronom/#{value}"
    when 'rfc'
      "https://tools.ietf.org/html/rfc#{value}"
    when 'wikidata'
      "https://www.wikidata.org/wiki/#{value}"
    else
      raise "Invalid xref URL for #{name.inspect} requested"
    end
  end

  def xref_title(name, value)
    v = xref_value(name, value)
    case name
    when 'forensicswiki'
      "#{v} in ForensicsWiki"
    when 'iso'
      "ISO/IEC #{v}"
    when 'justsolve'
      "#{v} in Just Solve the File Format Problem"
    when 'loc'
      "LOC #{v}"
    when 'pronom'
      "PRONOM #{v}"
    when 'rfc'
      "RFC #{v}"
    when 'wikidata'
      "Wikidata #{v}"
    else
      raise "Invalid xref name for #{name.inspect} requested"
    end
  end

  def xref_value(name, value)
    case name
    when 'forensicswiki', 'justsolve'
      value.gsub(/_/, ' ')
    when 'iso', 'loc', 'mime', 'pronom', 'rfc', 'wikidata'
      value
    else
      raise "Invalid xref value for #{name.inspect} requested"
    end
  end

  def xref_a(name, value)
    if value
      if value.is_a?(Array)
        value.map { |v| xref_a(name, v) }.join(", ")
      else
        url = xref_url(name, value)
        if url
          "<a href=\"#{url}\">#{xref_value(name, value)}</a>"
        else
          xref_value(name, value)
        end
      end
    end
  end

  def xref_li(name, value)
    if value
      if value.is_a?(Array)
        value.map { |v| xref_li(name, v) }.join("\n")
      else
        url = xref_url(name, value)
        if url
          "<li><a href=\"#{url}\">#{xref_title(name, value)}</a></li>"
        else
          "<li>#{xref_title(name, value)}</li>"
        end
      end
    end
  end

  def erb(name)
    bin_dir = File.expand_path(File.dirname(__FILE__))
    ERB.new(File.read("#{bin_dir}/#{name}.html.erb"), nil, nil, "_erbout_#{name}")
  end
end

fpg = FormatPagesGenerator.new(ARGV[0], ARGV[1], ARGV[2])
fpg.run
