Astrophysics Population Synthesis Script Overview

This Python script is designed for astrophysical research purposes, specifically for generating synthetic populations of stars based on a range of input parameters such as age, binarity, metallicity (Fe/H), helium mass fraction (Y), and metal mass fraction (Z). It utilizes multiprocessing for efficient data handling and outputs generated populations as .out files, suitable for analysis with convolutional neural networks (CNNs) or other machine learning tools aiming to quantify specific astrophysical phenomena, like the Jao Gap. The script interfaces with external data (model data) to derive population characteristics and requires a directory for output along with several customizable parameters to tailor the generated datasets.

from ThomasAstro.popSynth.fromTagger import make_lookup_dict
from ThomasAstro.popSynth.fromTagger import build_query
from ThomasAstro.popSynth.fromTagger import genPop
from ThomasAstro.popSynth.fromTagger import wrtout
 
import multiprocessing as mp
import pandas as pd
import numpy as np
 
import shutil
import os
 
from tqdm import tqdm
 
if __name__ == "__main__":
    import argparse
 
    parser = argparse.ArgumentParser(description="Generate Sample Data set of histograms from an input population for a sample of ages for use with some CNN Jao Gap Quantifier")
    parser.add_argument("mdata", type=str, help="model data to pull from", default=None)
 
    parser.add_argument("--young", type=float, default=2, help="Age, in Gyrs, that the youngest sample population will be")
    parser.add_argument("--old", type=float, default=10, help="Age, in Gyrs, that the oldest sample population will be")
 
    parser.add_argument("--stdLow", help="lower bound for age stanard deviation to give stars", type=float, default=0.1)
    parser.add_argument("--stdHigh", help="upper bound for stanrad deviation to give population age", type=float, default=2)
 
    parser.add_argument("--binarityLow", help="binary number fraction low end", default=0.01, type=float)
    parser.add_argument("--binarityHigh", help="binary number fraction high end", default=0.7, type=float)
 
    parser.add_argument("--FeHLow", help="Low end of FeH for populations", default=-1, type=float)
    parser.add_argument("--FeHHigh", help="High end of FeH used for poopulations", default=1.5, type=float)
    parser.add_argument("--stdFeH", help="standard deviation of FeH values within a cluster", default=0.1, type=float)
 
    parser.add_argument("--pops", type=int, default=100, help="number of populations to generate training data for")
    parser.add_argument("--num", type=int, help="number of stars per population", default=500)
 
    parser.add_argument("-o", "--output", type=str, help="directory to save fake CMD too", default="fCMDDir")
    parser.add_argument("-Y", default=None, type=float, help="Helium mass fraction to query")
    parser.add_argument("-Z", type=float, default=None, help="Metal mass fraction to query")
    parser.add_argument("--rootDir", help="root dir to use as the primary query seperator", type=str, default=None)
    parser.add_argument("--opacSource", type=str, default=None)
    parser.add_argument("--massRange", nargs=2, default=(None,None),type=float,help="mass range to pull population from")
    parser.add_argument("--threads", type=int, default=mp.cpu_count(), help="number of workers to use")
 
    args = parser.parse_args()
 
    meanAges = np.random.uniform(low=args.young, high=args.old, size=(args.pops,))
    stdAges = np.random.uniform(low=args.stdLow, high=args.stdHigh, size=(args.pops,))
    binarity = np.random.uniform(low=args.binarityLow, high=args.binarityHigh, size=(args.pops,))
 
    tagged = pd.read_csv(args.mdata)
    lookupDict = make_lookup_dict(tagged.set_index("UUID"))
 
    qd = {'opac': args.opacSource, 'Y': args.Y, 'Z': args.Z, 'dir2': args.rootDir}
    query = build_query(tagged, qd)
 
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.mkdir(args.output)
 
    for meanAge, stdAge, bf in zip(meanAges, stdAges, binarity):
        CMDData = genPop(tagged, lookupDict, args.num, meanAge, stdAge, bf, query, args.massRange, args.threads)
        ofName = f"fCMD_{meanAge:0.2f}_{stdAge:0.2f}_{bf:0.2f}.out"
        outPath = os.path.join(args.output, ofName)
        wrtout(CMDData, outPath)