File Organization and Data Parsing for Stellar Population Analysis

This Python script organizes astrophysical simulation data (historical tracking files) into structured directories based on their population and metallicity properties. It utilizes regular expressions to sort the input files and create appropriate directory structures, where it then copies and renames the files according to a standardized naming convention. Additionally, the script parses abundance files to extract chemical composition ratios and applies these to template input files for further simulation processing. The script supports forceful overwriting of existing directories with new data and expects user input for file paths and operation modes via command-line arguments.

import os
import shutil
from pathlib import Path
import argparse
import re
 
from pysep.opac.tops.parseChemFile import open_and_parse
 
def main(files, abun, force=False):
    populations = set([re.findall(r'pop([A-Z])', x)[0] for x in files])
    metals = set([re.findall(r'(0\.\d+)Y', x)[0] for x in files])
    abunFiles = os.listdir(abun)
    for pop in populations:
        for metal in metals:
            dirName = f'pop{pop}Y{metal.replace(".", "")}'
            if not os.path.exists(dirName):
                os.mkdir(dirName)
            else:
                if len(os.listdir(dirName)) == 0:
                    shutil.rmtree(dirName)
                    os.mkdir(dirName)
                else:
                    if not force:
                        print(f"{dirName} exists and is not empty! skipping!")
                        break
                    else:
                        shutil.rmtree(dirName)
                        os.mkdir(dirName)
            fileSet = filter(lambda x: f'pop{pop}' in x and f'{metal}Y' in x, files)
            newFileNames = list()
            for file in fileSet:
                fileMass = re.findall(r'DSEPTrk(0\.\d+)M', file)[0]
                newFileName = f'DSEPTrk{fileMass.replace(".", "")}M.hist'
                newFileNames.append(newFileName)
                shutil.copy(file, f"{dirName}/{newFileName}")
            os.mkdir(os.path.join(dirName, "eep"))
            os.mkdir(os.path.join(dirName, "iso"))
 
            newFileNames = sorted(newFileNames, key=lambda x: float(".{}".format(re.findall(r'DSEPTrk(\d+)M', x)[0][1:])))
 
 
            assert os.path.exists('template')
 
            popFile = filter(lambda x: f"pop{pop}" in x and f"Y+{metal}" in x, abunFiles)
            popFile = next(popFile)
            parsed = open_and_parse(os.path.join(abun, popFile))
 
            inputFiles = filter(lambda x: 'input' in x, os.listdir('template'))
            for iFile in inputFiles:
                if iFile.endswith('.isoin'):
                    newFileName = f"{dirName}input.isoin"
                    with open(os.path.join('template', iFile), 'r') as f:
                        contents = f.read().split('\n')
                    contents[1] = dirName
                    compLine = f"   {parsed['AbundanceRatio']['Y']:0.2f}  "\
                                f"{parsed['AbundanceRatio']['Z']:0.4E}   "\
                                f"{parsed['AbundanceRatio']['[Fe/H]']:0.2f}    "\
                                f"{parsed['AbundanceRatio']['[alpha/Fe]']:0.2f}"\
                                "      0.0"
                    contents[3] = compLine
                    contents[5] = os.path.abspath(dirName)
                    contents[6] = os.path.abspath(os.path.join(dirName, "eep"))
                    contents[7] = os.path.abspath(os.path.join(dirName, "iso"))
                    number = contents[11]
                    contents[11] = str(len(newFileNames))
                    templateFiles = contents[12:12+int(number)]
                    filteredContents = [x for i, x in enumerate(contents) if not x in templateFiles]
                    filteredContents[12:12] = newFileNames
                    joinedContents = '\n'.join(filteredContents)
                    with open(os.path.join(dirName, newFileName), 'w') as f:
                        f.write(joinedContents)
                else:
                    shutil.copy(os.path.join('template', iFile), os.path.join(dirName, iFile))
 
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="seperate hist files into directories")
    parser.add_argument("files", type=str, nargs='+', help="all files")
    parser.add_argument("-f", "--force", action="store_true", help="delete files if they exist")
    parser.add_argument("--abundir", type=str, help="path to abundance files")
 
    args = parser.parse_args()
    main(args.files, args.abundir, force=args.force)#, args.alphafe, args.Y, args.Z,