#!/bin/bash

set -euo pipefail

DEBUG=${DEBUG:-0}
if [[ ${DEBUG} -gt 0 ]]; then set -x; fi

# begin configuration 

# Default configuration for nodes
NODE_COUNT="2"
OS="lts" # Ubuntu LTS
MEMORY="2G"
CPU="2"
DISK="20G"
CLOUD_INIT="cloud-init.yaml"

# Cluster
CP_NAME="control-plane"
WORKER_NAME="worker"
CALICO_VERSION="v3.26.0"
CILIUM_VERSION="1.14.0"
CERT_MANAGER_VERSION="v1.8.0"
KUBECONFIG="kubeconfig.yaml"

# CNI networking
NETWORK="calico" # Options: calico, cilium, none
POD_CIDR="10.244.0.0/16"

# end configuration 

# Flags
DELETE_MODE=0


log() { echo -e "$(date '+%Y-%m-%d %H:%M:%S') [INFO] $*"; }
err() { echo -e "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] $*" >&2; exit 1; }

check_dependencies() {
    local dependencies=("multipass" "kubectl")
    if [[ "$NETWORK" == "cilium" ]]; then
        dependencies+=("helm")
    fi

    for cmd in "${dependencies[@]}"; do
        if ! command -v "$cmd" &> /dev/null; then
            err "$cmd could not be found. Please install it."
        fi
    done

    if [[ $DELETE_MODE -eq 0 && ! -f "$CLOUD_INIT" ]]; then
        err "Cloud-init configuration file not found at: $CLOUD_INIT"
    fi
}

usage() {
    echo "Usage: $0 [OPTIONS]"
    echo ""
    echo "Options:"
    echo "  -n <number>   Number of nodes total (default: 2)"
    echo "  -c <cpu>      CPU cores per node (default: 2)"
    echo "  -m <memory>   Memory per node (default: 2G)"
    echo "  -d <disk>     Disk size per node (default: 20G)"
    echo "  -t <network>  Network type: calico, cilium, none (default: calico)"
    echo "  -D            Delete/Destroy the existing cluster nodes"
    echo "  -h            Display this help message"
    echo ""
    echo "Example:"
    echo "  $0 -n 3 -c 4 -m 4G"
    exit 0
}

run_command() {
    local vm_name=$1
    local cmd=$2
    local max_retries=20
    local count=0

    while [ $count -lt $max_retries ]; do
        set +e
        multipass exec "$vm_name" -- bash -c "$cmd" 2>/dev/null
        local status=$?
        set -e

        if [ $status -eq 0 ]; then
            return 0
        fi
        
        if [ $count -gt 0 ]; then
             log "Waiting for $vm_name to be responsive..." \
                 "(Attempt $((count+1))/$max_retries)"
        fi
        sleep 5
        count=$((count+1))
    done

    err "Command failed on $vm_name after $max_retries attempts." \
        "The VM might be hung." \
        "\nCommand: $cmd"
}

# begin launch_vm
launch_vm() {
    local vm_name=$1
    log "Launching VM: $vm_name ($CPU cores, $MEMORY RAM, $DISK Disk)."
    
    if multipass info "$vm_name" &>/dev/null; then
        log "VM $vm_name already exists. Skipping launch."
    else
        multipass launch -n "$vm_name" --cloud-init="$CLOUD_INIT" \
            -c "$CPU" -m "$MEMORY" --disk "$DISK" "$OS"
    fi
}

wait_for_vm_ready() {
    local vm_name=$1
    log "Waiting for SSH on $vm_name..."
    run_command "$vm_name" "echo SSH_READY"

    log "Waiting for cloud-init on $vm_name..."
    run_command "$vm_name" "cloud-init status --wait"

    log "Verifying Containerd on $vm_name..."
    run_command "$vm_name" "ls /var/run/containerd/containerd.sock"
    
    log "VM $vm_name is ready."
}
# end launch_vm

# begin initialize_control_plane
initialize_control_plane() {
    log "Initializing Kubernetes on Control Plane."
    run_command "$CP_NAME" "sudo kubeadm init --pod-network-cidr=$POD_CIDR"

    run_command "$CP_NAME" \
        "mkdir -p /home/ubuntu/.kube && \
        sudo cp /etc/kubernetes/admin.conf /home/ubuntu/.kube/config && \
        sudo chown ubuntu:ubuntu /home/ubuntu/.kube/config"

    log "Transferring kubeconfig to local host."
    mkdir -p ~/.kube/
    multipass transfer "$CP_NAME":/home/ubuntu/.kube/config config
    mv config "$HOME/.kube/config"
    log "Cluster access configured at ~/.kube/config"

    until kubectl cluster-info &>/dev/null; do
        log "Waiting for local access to Kubernetes API Server..."
        sleep 2
    done

    log "Installing cert-manager ($CERT_MANAGER_VERSION)..."
    local cert_url="https://github.com/cert-manager/cert-manager/releases/download"
    kubectl apply -f "${cert_url}/${CERT_MANAGER_VERSION}/cert-manager.yaml"
}
# end initialize_control_plane

# begin setup_network
setup_network() {
    log "Setting up CNI: $NETWORK"
    case $NETWORK in
        "calico")
            local CALICO_URL="https://raw.githubusercontent.com/projectcalico/calico"
            local MANIFEST_PATH="${CALICO_VERSION}/manifests/calico.yaml"
            kubectl apply -f "${CALICO_URL}/${MANIFEST_PATH}"
            ;;
        "cilium")
            helm repo add cilium https://helm.cilium.io/
            helm repo update
            helm install cilium cilium/cilium --version "${CILIUM_VERSION}" \
               --namespace kube-system \
               --set prometheus.enabled=true \
               --set hubble.enabled=true \
               --set hubble.metrics.enableOpenMetrics=true \
               --set hubble.relay.enabled=true \
               --set hubble.ui.enabled=true
            ;;
        "none") log "Skipping CNI installation." ;;
        *) err "Unknown network type: $NETWORK" ;;
    esac
}
# end setup_network

# begin join_workers
join_workers() {
    log "Generating join token from $CP_NAME..."
    local join_command
    join_command=$(multipass exec "$CP_NAME" -- \
        sudo kubeadm token create --print-join-command)

    local worker_count=$((NODE_COUNT - 1))
    if [ "$worker_count" -le 0 ]; then
        log "No worker nodes to join."
        return 0
    fi    
    for ((i=0; i<worker_count; i++)); do
        local node_name="${WORKER_NAME}${i}"
        log "Joining $node_name to cluster..."
        run_command "$node_name" "sudo ${join_command}"

        log "Configuring kubectl access on $node_name..."
        run_command "$node_name" "mkdir -p /home/ubuntu/.kube"
        # When Multipass is installed as a Snap package, it is sandboxed
        # Thus, many directories are isolated
        cat "$HOME/.kube/config" | \
             multipass exec "$node_name" -- bash -c "cat > /home/ubuntu/.kube/config"
        run_command "$node_name" "sudo chown ubuntu:ubuntu /home/ubuntu/.kube/config"

        sleep 1
        kubectl label nodes "$node_name" \
            node-role.kubernetes.io/worker=worker --overwrite
    done
}
# end join_workers

# begin delete_cluster
delete_cluster() {
    log "Scanning for cluster nodes..."
    
    local nodes_to_delete
    nodes_to_delete=$(multipass list | grep -E "^($CP_NAME|$WORKER_NAME)" | \
        awk '{print $1}' | tr '\n' ' ')

    if [[ -z "$nodes_to_delete" ]]; then
        log "No cluster nodes matching '$CP_NAME' or '$WORKER_NAME'."
    else
        log "Deleting nodes: $nodes_to_delete"
        multipass delete --purge $nodes_to_delete
        log "Nodes deleted and purged."
    fi

    if [[ -f "$KUBECONFIG" ]]; then
        rm "$KUBECONFIG"
        log "Removed local $KUBECONFIG file."
    fi

    log "Cluster deleted."
}
# end delete_cluster


while getopts ":n:m:c:d:t:D" opt; do
    case "${opt}" in
        n) NODE_COUNT="$OPTARG";;
        m) MEMORY="$OPTARG";;
        c) CPU="$OPTARG";;
        d) DISK="$OPTARG";;
        t) NETWORK="$OPTARG";;
        D) DELETE_MODE=1;;
        *) usage ;;
    esac
done

# begin main
main() {
    check_dependencies

    if [[ $DELETE_MODE -eq 1 ]]; then
        delete_cluster
        exit 0
    fi

    log "Creating cluster ($NODE_COUNT nodes)"

    ALL_NODES=("$CP_NAME")
    for ((i=0; i < (NODE_COUNT - 1); i++)); do
        ALL_NODES+=("${WORKER_NAME}${i}")
    done

    for node in "${ALL_NODES[@]}"; do launch_vm "$node" & done
    wait
    for node in "${ALL_NODES[@]}"; do wait_for_vm_ready "$node" & done
    wait

    initialize_control_plane
    setup_network
    join_workers

    log "Cluster created."
    log "Run 'kubectl get nodes' to verify."
}
# end main

main "$@"