# written by Stefan Fischer, Tübingen Structural Microscopy Core Facility (TSM)
#Version_1_0, July 2025

from ij import IJ
from ij.io import DirectoryChooser           
import os, codecs, csv
from loci.formats import ImageReader
from loci.formats import MetadataTools


IJ.showMessage(
    "Crossbeam 550 Image Metadata Extractor",
    "This script extracts metadata from a folder of \n"
    "Crossbeam 550 images and writes two CSV files:\n\n"
    " 1) complete_metadata.csv  (all metadata)\n"
    " 2) metadatafiltered.csv  (selected columns)\n\n"
    "In the next step, please choose the folder that contains\n"
    "your images."
)

dc = DirectoryChooser("Select the image folder")
directory_path = dc.getDirectory()
if directory_path is None:
    IJ.showMessage("No folder selected - script aborted.")
    raise SystemExit
    

# Function to safely convert to unicode and handle errors
def safe_unicode(val):
    try:
        if isinstance(val, unicode):
            return val
        else:
            # Decode with 'utf-8', replace errors with replacement character
            return unicode(str(val), 'utf-8', errors='replace')
    except Exception as e:
        print("Error converting value to unicode: {}".format(e))
        return u''

# Function to handle replacement of problematic characters based on following character
def replace_invalid_characters(text):
    if isinstance(text, unicode):
        new_text = u''
        for i in range(len(text)):
            if text[i] == u'\ufffd':
                if i + 1 < len(text):
                    if text[i + 1].lower() == u'm' or text[i + 1] == u'A':
                        new_text += u'u'
                    elif text[i + 1] == u'C':
                        new_text += u''
                    else:
                        new_text += u'?'
                else:
                    new_text += u'?'
            else:
                new_text += text[i]
        return new_text
    return text

# Initialize list to hold all rows of data
rows = []

# Initialize a set to hold all keys for headers
all_keys = set()

# Iterate over all files in the selected directory
for filename in os.listdir(directory_path):
    if filename.lower().endswith(('.tiff', '.tif', '.dm3', '.czi')):
        file_path = os.path.join(directory_path, filename)

        # Read metadata using Bio-Formats library
        reader = ImageReader()
        omeMeta = MetadataTools.createOMEXMLMetadata()
        reader.setMetadataStore(omeMeta)
        reader.setId(file_path)
        seriesCount = reader.getSeriesCount()
        reader.setSeries(0)
        seriesMetadata = reader.getSeriesMetadata()
        globalMetadata = reader.getGlobalMetadata()
        reader.close()

        # Collect metadata
        metadata_dict = {'Filename': filename}

        # Collect series metadata
        for key in seriesMetadata.keySet():
            try:
                key_str = replace_invalid_characters(safe_unicode(key))  # Replace invalid chars
                value_str = replace_invalid_characters(safe_unicode(seriesMetadata.get(key)))
                metadata_dict[key_str] = value_str
                all_keys.add(key_str)
            except Exception as e:
                print("Error processing series metadata for key {}: {}".format(key, e))

        # Collect global metadata
        for key in globalMetadata.keySet():
            try:
                key_str = replace_invalid_characters(safe_unicode(key))  # Replace invalid chars
                value_str = replace_invalid_characters(safe_unicode(globalMetadata.get(key)))
                metadata_dict[key_str] = value_str
                all_keys.add(key_str)
            except Exception as e:
                print("Error processing global metadata for key {}: {}".format(key, e))

        # Append metadata dict to rows
        rows.append(metadata_dict)

# Sort rows alphabetically by filename
rows.sort(key=lambda x: x['Filename'])

# Create a CSV file for all images
csv_filename = "complete_metadata.csv"
csv_path = os.path.join(directory_path, csv_filename)

# Write all data to CSV
with codecs.open(csv_path, mode='w', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)

    # Write header with 'Filename' as the first column
    header = ['Filename'] + sorted(all_keys)
    writer.writerow(header)

    # Write rows with filename as the first column
    for row in rows:
        row_data = [row.get('Filename', '')] + [row.get(key, '') for key in sorted(all_keys)]
        writer.writerow(row_data)

print("Complete metadata saved to " + csv_filename)

filtered_filename = "metadata_filtered.csv"
filtered_path     = os.path.join(directory_path, filtered_filename)

# columns in specific order
wanted_cols = [
    "Filename", "Width", "Height", "Store resolution", "Image Pixel Size",
    "Detector", "WD", "EHT", "I Probe", "Cycle Time",
    "Dwell Time", "Noise Reduction", "Line Avg.Count"
]

with codecs.open(csv_path, mode='r', encoding='utf-8', errors='replace') as infile:
    reader  = csv.reader(infile)
    header  = next(reader)

    # index
    col_idx = [header.index(col) for col in wanted_cols if col in header]

    # Warning,if columns are missing
    missing = [col for col in wanted_cols if col not in header]
    if missing:
        print("in the CSV the following columns are missing:",
              ", ".join(missing))

    # writing filtered csv
    with codecs.open(filtered_path, mode='w', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)
        writer.writerow([header[i] for i in col_idx])          # header
        for row in reader:
            writer.writerow([row[i] if i < len(row) else ''    # rows
                             for i in col_idx])

print("Filtered metadata saved to " + filtered_filename)
