Astrophysical Data Processing for Stellar Models

This script is designed for loading and processing astrophysical data files, specifically for stellar models, in '.trk' (track files) and '.iso' (isochrone files) formats. It includes functionalities for parsing these files, extracting metadata, handling data via Pandas DataFrames, and saving processed data objects into a pickled format for later use. The program supports command-line arguments for specifying the input file and an optional output file, demonstrating its utility in scientific data analysis and computational astrophysics workflows.

import re
import pandas as pd
import numpy as np
import os
import itertools
import argparse
import pickle
 
def load_iso(path):
    header = ["Age", "Log_T", "Log_g", "Log_L", "Log_R", "Y_core", "Z_core", "(Z/X)_surf", "L_H", "L_He", "M_He_core", "M_CO_core"]
    iso = pd.read_csv(path, delim_whitespace=True, names=header, comment='#')
    with open(path, 'r') as f:
        metadata_str = f.readline()
    metadata_str = metadata_str.strip('#').lstrip().rstrip().split(' ')[:3]
    metadata = dict()
    for i, data_elem in enumerate(metadata_str):
        keyvalue = data_elem.split('=')
        metadata[keyvalue[0]] = float(keyvalue[1])
    return iso, metadata
 
def get_trk_metadata(line):
    mass_search = re.compile('(?<=Total mass =  ).*?(?=\s)')
    mixing_length_search = re.compile('(?<=Mixing length = ).*?(?=\s)')
    EOS_search = re.compile('(?<=EOS = ).*?(?=\s)')
    Atm_search = re.compile('(?<=Atm = ).*?(?=\s)')
    Low_T_opacities_search = re.compile('(?<=Low T opacities = ).*?(?=\s)')
 
    mass = mass_search.search(line)
    mixing_length = mixing_length_search.search(line)
    EOS = EOS_search.search(line)
    Atm = Atm_search.search(line)
    Low_T_opacities = Low_T_opacities_search.search(line)
    meta = {'Mass':float(mass.group()), 'Mixing Length': float(mixing_length.group()),
            'EOS': EOS.group(), 'Atm':Atm.group(), 'Low T Opacities': Low_T_opacities.group()}
    return meta
 
def load_trk(path):
    header = [ "Model_#", "shells", "AGE", "log_L", "log_R", "log_g",
              "log_Teff", "Mconv_core", "Mconv_env", "Rconv_env",
              "M_He_core", "Xenv", "Zenv", "L_ppI", "L_ppII", "L_ppIII",
              "L_CNO", "L_triple-alpha", "L_He-C", "L_gravity", "L_neutrinos_old",
              "L_%_Grav_eng", "L_Itot", "C_log_T", "C_log_RHO", "C_log_P",
              "C_BETA", "C_ETA", "C_X", "C_Z", "C_H" "C_shell_midpoint",
              "C_H_shell_mass", "C_T_at_base_of_cz", "C_rho_at_base_of_cz","CA_He3",
              "CA_C12", "CA_C13", "CA_N14", "CA_N15", "CA_O16",
              "CA_O17", "CA_O18", "SA_He3", "SA_C12", "SA_C13", "SA_N14",
              "SA_N15", "SA_O16", "SA_O17","SA_O18", "N_pp", "N_pep", "N_hep",
              "N_Be7", "N_B8", "N_N13", "N_O15", "N_F17", "Cl37_flux",
              "Ga71_flux"]
    trk = pd.DataFrame(columns=header)
    with open(path, 'r') as trk_file:
        all_lines = trk_file.readlines()
    metadata = get_trk_metadata(all_lines[4])
    lines = [x.lstrip().rstrip().split() for x in all_lines[14:]]
    merged_lines = [x+y+z+w+q for x, y, z, w, q in zip(lines[::6], lines[1::6], lines[2::6], lines[3::6], lines[4::6], lines[5::6])]
    numeric_lines = [[float(y) for y in x] for x in merged_lines]
    for index, row in enumerate(numeric_lines):
        trk.loc[index] = row
    return trk, metadata
 
def load_trk_models(path):
    trks = list()
    metas = list()
    for file in os.listdir(path):
        if file.endswith('.trk'):
            print(file)
            trk, meta = load_trk(os.path.join(path, file))
            trks.append(trk)
            metas.append(meta)
    trks, metas = sort_based_on_key(trks, metas, key='Mass')
    return trks, metas
 
def load_iso_models(path):
    isos = list()
    metas = list()
    for file in os.listdir(path):
        if file.endswith('.iso'):
            iso, metadata = load_iso(os.path.join(path, file))
            isos.append(iso)
            metas.append(metadata)
    isos, metas = sort_based_on_key(isos, metas, key='M')
    return isos, metas
 
def load(path):
    functions = {'track': load_trk, 'iso': load_iso}
    suffix = args.path.split('.')[-1]
    data, metadata = functions[suffix](args.path)
    return data, metadata
 
 
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Load data from a dsep3 trk or iso file")
    parser.add_argument("path", help="path to file", type=str)
    parser.add_argument("-o", "--output", help="path to save as pickle", type=str)
 
    args = parser.parse_args()
 
    data, metadata = load(args.path)
 
    if args.output:
        data_package = {"data": data, "metadata": metadata}
        pickle.dump(data_package, open(args.output, "wb"))
    else:
        print("========= METADATA ===========")
        for key in metadata:
            print(f"{key}: {metadata[key]}")
 
        print("=========== DATA =============")
        for index, row in data.iterrows():
            print(row)