{# Tagged and Tokenized Output #} {% if tags is defined and tags|length == 2 and tokens is defined and s is defined %} {% set tag_rules = tags[0] %} {% set tag_data = tags[1] %} {# TODO: Output untagged tokens and/or untokenized chars BEFORE first tag!!! #} {% for tag_index in range(0, tag_data|length) %} {% set tag = tag_data[tag_index] %} {% set next_pos = tag.pos_end + tag.token_end_len %} {# Begin tag opening wrapper #} {% set rules = [] %} {% for rule_tuple in tag["rules"] %} {% do rules.append(rule_tuple[0]) %} {% endfor %} {% set tag_id = "tag_" + loop.index|string %} 0 %}{{ tag["classes"]|join(" ") }}{% endif %}"> {# End tag opening wrapper #} {# Crazy topic model display hack #} {#{% if "classes" in tag and tag["classes"]|length > 0 %} {% for class in tag["classes"] %}{% endfor %} {% endif %}#} {# Output tagged tokens mapped to this tag. #} {% for token in tokens[tag.index_start:tag.index_end + 1] %} {% if token[token_type_index] == token_types["WHITESPACE"] or token[token_type_index] == token_types["NEWLINE"] %} {% set token_str = token[token_strs_index][token_whitespace_newline_str_to_output_index] %} {% else %} {% set token_str = token[token_strs_index][token_str_to_output_index] %} {% endif %} {{ token_str }} {% endfor %} {# Begin tag closing wrapper #} {# End tag closing wrapper #} {# There could be untokenized chars in between these tag_data! #} {# Try outputting untagged tokens and/or untokenized chars in between this tag and the next_tag. #} {% set next_tag_index = tag_index + 1 %} {% if next_tag_index < tag_data|length %} {# Cool, there is a next tag #} {% set next_tag = tag_data[next_tag_index] %} {# Untagged token indexes #} {% set untagged_tokens_index_start = tag.index_end + 1 %} {% set untagged_tokens_index_end = next_tag.index_start - 1 %} {% include "_tokens_s.html" %} {# {% else %}#} {# TODO: Fix missing str output #} {# There could *still* be untokenized chars in between these tag_data! {# next_pos may have been updated!#} {# Untokenized char byte positions, i.e. indexes into the str, s. #} {# {% set untokenized_chars_pos_start = next_pos %}#} {# {% set untokenized_chars_pos_end = next_tag.pos_start - 1 %}#} {##} {# {% include "__s.html" %}#} {% endif %} {% endfor %} {% endif %}