import json
import itertools
import pandas as pd

from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder


FILENAME = 'tcp_retransmissions.jsonl'

print("Step 1: Data Collection")
unique_pairs = set()

with open(FILENAME, 'r') as file:
    for line in file:
        
        line = line.strip()
        if not line:
            continue
        
        data = json.loads(line)
        
        metric = data.get("metric", {})
        src_ip = metric.get("src_ip")
        dst_ip = metric.get("dst_ip")
        
        if src_ip and dst_ip:
            unique_pairs.add((src_ip, dst_ip))

print("Step 2: Preprocessing")
pod_ips = list({ip for pair in unique_pairs for ip in pair})
ip_pairs = [(x[0], x[1], "OK") for x in itertools.permutations(pod_ips, 2)]
df = pd.DataFrame(ip_pairs, columns=["src_ip", "dst_ip", "status"])

for src_ip, dst_ip in unique_pairs:
    df.loc[(df["src_ip"] == src_ip) & (df["dst_ip"] == dst_ip), "status"] = "FAIL"

te = TransactionEncoder()
te_data = te.fit_transform(df.values.tolist())
df = pd.DataFrame(te_data, columns=te.columns_)

print("Step 3: Association rule mining")
frequent_itemsets = apriori(df, min_support=0.001, use_colnames=True)
rules = association_rules(frequent_itemsets, num_itemsets=len(frequent_itemsets),
                          metric="confidence", min_threshold=0.7)
rules['antecedents'] = rules['antecedents'].apply(lambda x: ', '.join(list(x)))
rules['consequents'] = rules['consequents'].apply(lambda x: ', '.join(list(x)))                          
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
