import plotly.graph_objects as go
import plotly.io as pio
import json
import textwrap

# Load data
with open("sentence_match_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

sentences_2019 = data["sentences_2019"]
sentences_2020 = data["sentences_2020"]
matched_indices = [tuple(pair) for pair in data["matched_indices"]]
unmatched_2019 = data["unmatched_2019"]
unmatched_2020 = data["unmatched_2020"]

# Helper function to wrap text for hover
def wrap_text(text, width=60):
    return "<br>".join(textwrap.wrap(text, width))

fig = go.Figure()

# Matched sentence pairs
for (i, j) in matched_indices:
    fig.add_trace(go.Scatter(
        x=[0, 1],
        y=[i, j],
        mode='lines+markers',
        line=dict(width=1, color='#333'),
        marker=dict(size=6, color='green'),
        text=[wrap_text(sentences_2019[i]), wrap_text(sentences_2020[j])],
        hoverinfo='text',
        showlegend=False
    ))

# Unmatched sentences
fig.add_trace(go.Scatter(
    x=[0]*len(unmatched_2019) + [1]*len(unmatched_2020),
    y=[i for i, _ in unmatched_2019] + [j for j, _ in unmatched_2020],
    mode='markers',
    marker=dict(size=6, color='red'),
    text=[wrap_text(s) for _, s in unmatched_2019] +
         [wrap_text(s) for _, s in unmatched_2020],
    hoverinfo='text',
    name='Unmatched sentence'
))

# Layout with system-installed IBM Plex Serif font
fig.update_layout(
    title="",
    plot_bgcolor='#f0f0f0',
    font=dict(family='IBM Plex Serif'),  # Make sure this matches installed font name
    xaxis=dict(
        tickvals=[0, 1],
        ticktext=['2019', '2020'],
        title='Year',
        showgrid=False,
        zeroline=False
    ),
    yaxis=dict(
        title='Sentence Index',
        dtick=20,
        showgrid=False,
        zeroline=False,
        autorange='reversed'
    ),
    hovermode='closest',
    height=1100,
    showlegend=False,
    margin=dict(t=20, b=0, l=60, r=60)
)

# Save as interactive HTML (web font works here)
html_str = pio.to_html(fig, full_html=True, include_plotlyjs='cdn')
font_link = '<link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Serif&display=swap" rel="stylesheet">'
html_str = html_str.replace('<head>', f'<head>\n    {font_link}')
with open("sentence_matches.html", "w", encoding="utf-8") as f:
    f.write(html_str)

# Export to high-resolution PNG (uses system-installed font)
fig.write_image("sentence_matches.png", scale=3)

# Show interactive plot in notebook
fig.show()

fig.write_image("sentence_matches.png", scale=2)
fig.write_image("sentence_matches.png", scale=2)
fig.write_image("sentence_matches.svg")