This page is read only. You can view the source, but not change it. Ask your administrator if you think this is wrong. ====== Astronomical Data Analysis with Advanced Clustering and Fiducial Points Identification ====== This Python script is designed for the analysis of astronomical data, focusing on the identification of fiducial points across main sequences and sub-giant branches in stellar populations. It employs various scientific libraries to process and analyze color-magnitude data of stars, using methods such as hierarchical clustering (HDBSCAN), KMeans clustering, curve fitting, and smoothing techniques (Savitzky-Golay filter, B-splines) to identify and refine fiducial sequences. The script is intended for advanced users with a background in astrophysics or astronomy, conducting research that requires precise characterization of stellar populations. <code python> import matplotlib.pyplot as plt import pickle as pkl import numpy as np from scipy.optimize import minimize from scipy.interpolate import interp1d from functools import lru_cache, wraps from scipy.spatial.distance import cdist import hdbscan from scipy.optimize import curve_fit from sklearn.cluster import KMeans from scipy.signal import savgol_filter from scipy.interpolate import splrep, BSpline def two_point(r0, ri, n=50): distance = cdist(r0.reshape(1,2), ri)[0] nSmallestDistances = distance[np.argpartition(distance, n)[:n]] return np.median(nSmallestDistances) def iter_cut(color, mag, n=100, m=1.5): # presort points based on their angle off the horizontal # this is to try to push the distance partitionaing to a better # case senario theta = np.arctan(mag/color) sortedTheta = theta.argsort() color = color[sortedTheta] mag = mag[sortedTheta] ri = np.vstack((color, mag)).T c = np.array([two_point(x,ri,n=n) for x in ri]) stdC = np.std(c) meanC = np.median(c) c2 = cdist(c.reshape(c.shape[0],1), np.array([meanC]).reshape(1,1)).flatten()/stdC color = color[c2 != 0] mag = mag[c2 != 0] c2b = c2.copy() c2 = c2[c2 != 0] condition = np.log10(c2) < m*np.median(np.log10(c2))#-1.5 colorCut = color[condition] magCut = mag[condition] c2Cut = c2[condition] return { "color": colorCut, "mag": magCut, "sigDistCut": c2Cut, "corr": c, "log10_sigDist": np.log10(c2b) } def smooth_sequence(color, mag, s=1): sortedMag = mag.argsort() sC = color[sortedMag] sM = mag[sortedMag] tck = splrep(sM, sC, s=s) f = BSpline(*tck) return f(mag), f def bifricated_MS_fiducials(color, mag, binSize=0.1): magBins = np.arange(mag.min(), mag.max()+binSize, binSize) fiducialPoints = np.empty(shape=(2,magBins.shape[0]-1,2)) for idx, (bb,bt) in enumerate(zip(magBins[:-1], magBins[1:])): cMag = mag[(mag >= bb) & (mag < bt)] cCol = color[(mag >= bb) & (mag < bt)] points = np.vstack((cCol, cMag)).T clusters = KMeans(n_clusters=2,random_state=0,n_init='auto').fit(points) LRdet = 0 if np.mean(points[clusters.labels_ == 0][:,0]) < np.mean(points[clusters.labels_ == 1][:,0]) else 1 group1Points = points[clusters.labels_ == LRdet] group2Points = points[clusters.labels_ == int(not LRdet)] fiducialPoints[0,idx,0] = np.mean(group1Points[:,0]) fiducialPoints[0,idx,1] = np.mean(group1Points[:,1]) fiducialPoints[1,idx,0] = np.mean(group2Points[:,0]) fiducialPoints[1,idx,1] = np.mean(group2Points[:,1]) return fiducialPoints def sub_MSTO_fiducial(color, mag, MSFiducials, binSize=0.1): gaus = lambda x, a, b, c: a*np.exp(-(x-b)**2/(2*c**2)) belowMSColor = color[mag > MSFiducials[:,:,1].max()] belowMSMag = mag[mag > MSFiducials[:,:,1].max()] magRange = (np.percentile(belowMSMag,5), np.percentile(belowMSMag,95)) belowMSColor = belowMSColor[(belowMSMag >= magRange[0]) & (belowMSMag < magRange[1])] belowMSMag = belowMSMag[(belowMSMag >= magRange[0]) & (belowMSMag < magRange[1])] subMSMags = np.arange(belowMSMag.min(), belowMSMag.max()+binSize, binSize) fiducialPoints = np.empty(shape=(subMSMags.shape[0]-1,2)) for idx, (bb, bt) in enumerate(zip(subMSMags[:-1], subMSMags[1:])): cC = belowMSColor[(belowMSMag >= bb) * (belowMSMag < bt)] hist,bins = np.histogram(cC, density=True,bins='fd') centers = (bins[1:] + bins[:-1])/2 try: fit, covar = curve_fit(gaus, centers, hist) fiducialColor = fit[1] except RuntimeError: print("FAIL") fiducialColor = np.median(cC) fiducialPoints[idx, 0] = fiducialColor fiducialPoints[idx, 1] = (bb+bt)/2 return fiducialPoints def bifricated_super_MSTO_fiducial(color, mag, MSFiducials, binSize=0.1): aboveMSColor = color[mag < MSFiducials[:,:,1].min()] aboveMSMag = mag[mag < MSFiducials[:,:,1].min()] magRange = (np.percentile(aboveMSMag,5), np.percentile(aboveMSMag,95)) colorRange = (np.percentile(aboveMSColor,5), np.percentile(aboveMSColor,95)) aboveMSColor = aboveMSColor[(aboveMSMag >= magRange[0]) & (aboveMSMag < magRange[1])] aboveMSMag = aboveMSMag[(aboveMSMag >= magRange[0]) & (aboveMSMag < magRange[1])] aboveMSMag = aboveMSMag[(aboveMSColor >= colorRange[0]) & (aboveMSColor < colorRange[1])] aboveMSColor = aboveMSColor[(aboveMSColor >= colorRange[0]) & (aboveMSColor < colorRange[1])] r = iter_cut(aboveMSColor.values, aboveMSMag.values, n=50,m=0) superMSMagBins = np.arange(r["mag"].min(), r["mag"].max()+binSize, binSize) fiducialPoints = np.empty(shape=(2,superMSMagBins.shape[0]-1,2)) for idx, (bb, bt) in enumerate(zip(superMSMagBins[:-1], superMSMagBins[1:])): cC = r["color"][(r["mag"] >= bb) & (r["mag"] < bt)] cM = r["mag"][(r["mag"] >= bb) & (r["mag"]< bt)] colorRange = (np.percentile(cC,5), np.percentile(cC,95)) cM = cM[(cC >= colorRange[0]) & (cC < colorRange[1])] cC = cC[(cC >= colorRange[0]) & (cC < colorRange[1])] points = np.vstack((cC, cM)).T clusters = KMeans(n_clusters=2, random_state=0,n_init='auto').fit(points) LRdet = 0 if np.mean(points[clusters.labels_ == 0][:,0]) < np.mean(points[clusters.labels_ == 1][:,0]) else 1 group1Points = points[clusters.labels_ == LRdet] group2Points = points[clusters.labels_ == int(not LRdet)] fiducialPoints[0,idx,0] = np.mean(group1Points[:,0]) fiducialPoints[0,idx,1] = np.mean(group1Points[:,1]) fiducialPoints[1,idx,0] = np.mean(group2Points[:,0]) fiducialPoints[1,idx,1] = np.mean(group2Points[:,1]) return fiducialPoints def sequence_mid_point(A, B, n=3,s=0.5): AltB = True if A[:,1].min() > B[:,1].max() else False sortedA = np.sort(A.copy()) sortedB = np.sort(B.copy()) if AltB: Ap = A[:n] Bp = B[-n:] else: Ap = A[-n:] Bp = B[:n] fiducialWindow = np.vstack((Ap,Bp)) fiducialWindow = fiducialWindow[fiducialWindow[:,0].argsort()] _, fSmooth = smooth_sequence(fiducialWindow[:,0], fiducialWindow[:,1],s=s) if AltB: A[:n, 0] = fSmooth(A[:n, 1]) B[-n:, 0] = fSmooth(B[-n:, 1]) else: A[-n:, 0] = fSmooth(A[-n:, 1]) B[:n, 0] = fSmooth(B[:n, 1]) return A, B def fiducial_points(color, mag): MSTOR = iter_cut(color, mag, m=1.5) mainSequences = np.vstack((MSTOR["color"], MSTOR["mag"])).T MSFiducials = bifricated_MS_fiducials(mainSequences[:,0], mainSequences[:,1]) subMSFiducialsRoot = sub_MSTO_fiducial(color,mag,MSFiducials,binSize=0.1) subMSFiducials = np.empty(shape=(2,subMSFiducialsRoot.shape[0],2)) subMSFiducials[0] = subMSFiducialsRoot subMSFiducials[1] = subMSFiducialsRoot superMSFiducials = bifricated_super_MSTO_fiducial(color, mag, MSFiducials) for sequenceID, (MS, RGB) in enumerate(zip(MSFiducials, superMSFiducials)): MSsmooth, RGBsmooth = sequence_mid_point(MS, RGB, n=4,s=1) MSFiducials[sequenceID] = MSsmooth superMSFiducials[sequenceID] = RGBsmooth for sequenceID, sequence in enumerate(superMSFiducials): superMSSmoothColor, _ = smooth_sequence(sequence[:, 0], sequence[:, 1], s=0.1) superMSFiducials[sequenceID, :, 0] = superMSSmoothColor for sequenceID, (PMS, MS) in enumerate(zip(subMSFiducials, MSFiducials)): PMSsmooth, MSsmooth = sequence_mid_point(PMS, MS,n=4,s=1) MSFiducials[sequenceID] = MS subMSFiducials[sequenceID] = PMS for sequenceID, sequence in enumerate(MSFiducials): MSSmoothColor, _ = smooth_sequence(sequence[:, 0], sequence[:, 1], s=0) MSFiducials[sequenceID, :, 0] = MSSmoothColor tfp = MSFiducials.shape[1] + subMSFiducials.shape[1] + superMSFiducials.shape[1] fiducialPoints = np.empty( shape=( MSFiducials.shape[0], tfp, 2 ) ) for sequenceID, _ in enumerate(fiducialPoints): fiducialPoints[sequenceID] = np.vstack( (subMSFiducials[sequenceID],MSFiducials[sequenceID],superMSFiducials[sequenceID]) ) return fiducialPoints def fiducial_functions(color, mag, s=0): fiducialPoints = fiducial_points(color, mag) functions = list() for sequenceID, sequence in enumerate(fiducialPoints): functions.append(interp1d(sequence[:,1], sequence[:,0], bounds_error=False, fill_value="extrapolate")) return {i+1: f for i, f in enumerate(functions)} </code>