import os
import json
from rdflib import Graph
import pandas as pd

# dirs
CSV_DIR = "csvs"
OUTPUT_DIR = "ttls"
TEMPLATE = "mapping_template.ttl"
METADATA = "refdataproject_production.json"

# load metadata
with open(METADATA, "r") as f:
    metadata = json.load(f)

# JSON list into lookup dict by tensile test id, i.e. csv filename
metadata_lookup = {entry["tensile_specimen_id"]: entry for entry in metadata}

# load ttl template
with open(TEMPLATE, "r") as f:
    template = f.read()

# ttl placeholder to csv entry name mapping
csv_to_ttl = {
    "sotep": "Slope of linear-elastic region",
    "ppeamf": "Uniform elongation",
    "ts": "Ultimate tensile strength",
    "pspe": "Yield stress at 1% plastic strain",
    "rp02": "Yield stress at 0.2% plastic strain",
    "parallel_length": "Parallel length",
    "og_gauge_length": "Gauge length",
    "cross_section_area": "Original cross-section",
    "og_diameter": "Gauge diameter",
    "batch": "Heat treatment batch",
    "strain_rate": "Strain rate",
    "crosshead_separation_rate": "Crosshead seperation speed",
    "date": "Date",
    "machine_type": "Machine type"
}

# json metadata variables
json_vars = ["blank_id_iwt", "blank_id", "tensile_specimen_id", "aust_temp"]

for file_name in os.listdir(CSV_DIR):
    if not file_name.endswith(".csv"):
        continue

    csv_path = os.path.join(CSV_DIR, file_name)
    print(f"Processing {csv_path}...")

    csv_meta_dict = {}
    with open(csv_path, "r") as f:
        for _ in range(23): # only read first 23 lines (the metadata)
            line = f.readline().strip()
            if not line:
                continue
            parts = line.split("\t")
            if len(parts) >= 2:
                key = parts[0].strip()
                value = parts[1].strip()
                csv_meta_dict[key] = value

    mapped_values = {}

    for ttl_key, csv_key in csv_to_ttl.items():
        # some csv keys may have a colon at the end
        csv_key_with_colon = csv_key if csv_key in csv_meta_dict else csv_key + ":"

        if csv_key_with_colon in csv_meta_dict:
            value = csv_meta_dict[csv_key_with_colon]
            
            # fix date format
            if ttl_key == "date" and value:
                value = value.split(" ")[0]
            
            # replace space with underscore for machine_type
            if ttl_key == "machine_type" and value:
                value = value.replace(" ", "_")
            mapped_values[ttl_key] = value
        else:
            mapped_values[ttl_key] = None

    # fill csv metadata information
    ttl_filled = template
    for key, value in mapped_values.items():
        ttl_filled = ttl_filled.replace(f"{{{key}}}", str(value))

    # fill json metadata information
    json_entry = metadata_lookup.get(file_name.replace(".csv", ""), {})
    for json_key in json_vars:
        ttl_filled = ttl_filled.replace(f"{{{json_key}}}", str(json_entry.get(json_key, "")))

    # write to corresponding ttl file
    ttl_file_path = os.path.join(OUTPUT_DIR, file_name.replace(".csv", ".ttl"))
    with open(ttl_file_path, "w") as f:
        f.write(ttl_filled)
    
    print(f"Written TTL file: {ttl_file_path}")

# now merge all of them into a single big graph using rdflib

MERGED_FILE = "merged.ttl"
merged_graph = Graph()

for ttl_file in os.listdir(OUTPUT_DIR):
    if not ttl_file.endswith(".ttl"):
        continue
    ttl_path = os.path.join(OUTPUT_DIR, ttl_file)
    merged_graph.parse(ttl_path, format="ttl")

# deduplicate
merged_graph.serialize(MERGED_FILE, format="ttl")

print(f"Merged TTL saved to {MERGED_FILE}")