import runPythonAsync from '../../pyodide/py-worker';

const getInstagramTimezoneCode = () =>
  Intl.DateTimeFormat().resolvedOptions().timeZone;


const parseEmailsToTables = async (fileObj) => {
  const result = await runPythonAsync(
    `
    import base64
    import gzip
    import js
    import json
    import logging
    import sys
    from itertools import islice

    import pandas as pd

    from stalkmyself.email import ParsedEmail, iter_mbox, make_email_dataframe
    from stalkmyself.email.amazon_emails import CONFIRMATION_ADDRESS, parse_confirmation_summary, is_confirmation_email, get_orders_df
    from stalkmyself.utils import iter_js_file_lines

    LOG = logging.getLogger(__name__)

    log_k = 300
    parsed_messages = []
    amazon_confirmation_summaries = []
    mbox = iter_js_file_lines(js.fileObj, log_every_k=100_000)
    for i, m in enumerate(iter_mbox(mbox)):
        try:
            msg = ParsedEmail.from_email_message(m)
            if is_confirmation_email(msg):
                try:
                    for order in parse_confirmation_summary(msg.body_content_html):
                        amazon_confirmation_summaries.append(
                            {
                                "date": msg.headers_standard.date,
                                "order_number": order.order_number,
                                "order_total": float(order.order_total.strip("$")),
                                "n_shipments": len(order.shipments),
                            }
                        )
                except Exception:
                    LOG.warning("Skipping Amazon order confirmation email due to parsing error")
            parsed_messages.append(msg.strip_body())
        except Exception:
            LOG.warning(f"Could not parse message {i}, skipping.")
        if i % log_k == log_k - 1:
            LOG.info(f"Read {i+1:7,d} messages")
    df_amazon = None
    if len(amazon_confirmation_summaries) > 0:
        df_amazon = pd.DataFrame(amazon_confirmation_summaries).sort_values(by="date")
    df_emails = make_email_dataframe(parsed_messages, include_body=False)
    amazon_res = None
    if df_amazon is not None:
        amazon_res = base64.b64encode(gzip.compress(df_amazon.to_csv(index=False).encode("utf-8", errors="replace"))).decode("utf-8")
    emails_str_bytes = df_emails.to_csv(index=False).encode("utf-8", errors="replace")
    emails_res = base64.b64encode(gzip.compress(emails_str_bytes)).decode("utf-8")
    LOG.info(f"Email res takes {sys.getsizeof(emails_res) / 2 ** 20:.4f}MiB")
    json.dumps(
        {
            "amazon_order_csv_b64": amazon_res,
            "email_csv_b64": emails_res,
        },
    )
    `,
    { fileObj }
  );
  return JSON.parse(result);
};


const analyzeEmailTables = async (emailCSV, amazonCSV) => {
  const tzCode = getInstagramTimezoneCode();
  const result = await runPythonAsync(
    `
    import base64
    import gzip
    import io
    import js
    import json
    import logging
    from datetime import timedelta

    import pandas as pd

    from stalkmyself.email.amazon_emails import plot_cumulative_spending
    from stalkmyself.utils import donut_plot, matplotlib_save_to_data_url

    LOG = logging.getLogger(__name__)

    timezone_string = js.tzCode
    last_n_days = 180

    email_bytes = gzip.decompress(base64.b64decode(js.emailCSV.encode("utf-8")))
    df_emails = pd.read_csv(io.StringIO(email_bytes.decode("utf-8")))
    df_emails["date"] = pd.to_datetime(df_emails["date"]).dt.tz_convert(timezone_string)
    to_email = df_emails["to_address"].value_counts().index[0]

    latest_datetime = df_emails["date"].max()
    last_date = latest_datetime.date() - timedelta(days=1)
    first_date = latest_datetime.date() - timedelta(days=last_n_days)
    date_mask_emails = df_emails["date"].dt.date.between(first_date, last_date)
    is_not_spam_sent = ~df_emails["is_spam"] & ~df_emails["is_sent"]
    is_sent = df_emails["is_sent"]
    df_emails_recieved = df_emails.loc[is_not_spam_sent & date_mask_emails]
    df_emails_sent = df_emails.loc[is_sent & date_mask_emails]
    hour_block_names = ["12am-6am", "6am-12pm", "12pm-6pm", "6pm-12am"]
    weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

    result_list = []

    amazon_spending_fig_url = None
    if js.amazonCSV is not None:
        amazon_bytes = gzip.decompress(base64.b64decode(js.amazonCSV.encode("utf-8")))
        df_amazon = pd.read_csv(io.StringIO(amazon_bytes.decode("utf-8")))
        df_amazon["date"] = pd.to_datetime(df_amazon["date"]).dt.tz_convert(timezone_string)
        date_mask_amazon = df_amazon["date"].dt.date.between(first_date, last_date)
        amazon_spending_title = f"Amazon Shopping ({first_date.isoformat()} to {last_date.isoformat()})"
        amazon_spending_fig = plot_cumulative_spending(df_amazon[date_mask_amazon].set_index("date"), title=amazon_spending_title)
        if amazon_spending_fig is not None:
            amazon_spending_fig_url = matplotlib_save_to_data_url(amazon_spending_fig)
            amazon_spending_fig.clf()
            result_list.append({
                "title": "Amazon Spending",
                "description": "We read your Amazon receipts and here's what we found. Note that this doesn't consider returns, and that purchases made with gift cards count as zero in your receipts.",
                "image_data_url": amazon_spending_fig_url,
            })
        else:
            LOG.warning("Had Amazon CSV passed in, but failed to make a plot!")
    


    weekday_counts = df_emails_recieved.date.dt.weekday.value_counts().sort_index()
    weekday_counts.index = [weekday_names[i] for i in weekday_counts.index]
    weekday_received_fig = donut_plot(
        weekday_counts,
        title="Email Received by Weekday",
        username=to_email,
        first_day=first_date,
        last_day=last_date,
    )
    if weekday_received_fig is not None:
        weekday_received_fig_url = matplotlib_save_to_data_url(weekday_received_fig)
        weekday_received_fig.clf()
        result_list.append(
            {
                "title": "Weekday Received Emails",
                "description": f"Here's the breakdown of when during the week you got your emails in the most recent 90 days (timezone: {timezone_string})",
                "image_data_url": weekday_received_fig_url,
            },
        )

    hour_block = pd.cut(
        df_emails_recieved["date"].dt.hour,
        range(0, 25, 6),
        right=False,
        labels=hour_block_names,
    )
    hour_block_counts = hour_block.value_counts().sort_index()
    time_received_fig = donut_plot(
        hour_block_counts,
        title="Email Received by Time",
        username=to_email,
        first_day=first_date,
        last_day=last_date,
        label_fontsize=8
    )
    if time_received_fig is not None:
        time_received_fig_url = matplotlib_save_to_data_url(time_received_fig)
        time_received_fig.clf()
        result_list.append(
            {
                "title": "Time of Day Received",
                "description": f"Here's the breakdown of when during the day you got your emails in the most recent 90 days (timezone: {timezone_string})",
                "image_data_url": time_received_fig_url,
            },
        )


    weekday_counts = df_emails_sent.date.dt.weekday.value_counts().sort_index()
    weekday_counts.index = [weekday_names[i] for i in weekday_counts.index]
    weekday_sent_fig = donut_plot(
        weekday_counts,
        title="Email Sent by Weekday",
        username=to_email,
        first_day=first_date,
        last_day=last_date,
    )
    if weekday_sent_fig is not None:
        weekday_sent_fig_url = matplotlib_save_to_data_url(weekday_sent_fig)
        weekday_sent_fig.clf()
        result_list.append(
            {
                "title": "Weekday Sent Emails",
                "description": f"Here's the breakdown of when during the week you sent your emails in the most recent 90 days (timezone: {timezone_string})",
                "image_data_url": weekday_sent_fig_url,
            }
        )

    hour_block = pd.cut(
        df_emails_sent["date"].dt.hour,
        range(0, 25, 6),
        right=False,
        labels=hour_block_names,
    )
    hour_block_counts = hour_block.value_counts().sort_index()
    time_sent_fig = donut_plot(
        hour_block_counts,
        title="Email Sent by Time",
        username=to_email,
        first_day=first_date,
        last_day=last_date,
        label_fontsize=8,
    )
    if time_sent_fig is not None:
        time_sent_fig_url = matplotlib_save_to_data_url(time_sent_fig)
        time_sent_fig.clf()
        result_list.append(
            {
                "title": "Time of Day Sent",
                "description": f"Here's the breakdown of when during the day you sent your emails in the most recent 90 days (timezone: {timezone_string})",
                "image_data_url": time_sent_fig_url,
            }
        )
 
    json.dumps({"result": result_list})
    `,
    { amazonCSV, emailCSV, tzCode}
  );
  return JSON.parse(result).result;
}


export { parseEmailsToTables, analyzeEmailTables, runPythonAsync };
