import math import numpy as np H = math.sqrt(0.50) # Assume parent-child income heritability is 50% N = 100000 mom_earning_genotype = np.random.normal(0, 1, N) dad_earning_genotype = np.random.normal(0, 1, N) child_earning_genotype = 0.5 * mom_earning_genotype + 0.5 * dad_earning_genotype + np.random.normal(0, math.sqrt(0.5), N) mom_earning_phenotype = H * mom_earning_genotype + np.random.normal(0, math.sqrt(1 - H**2), N) dad_earning_phenotype = H * dad_earning_genotype + np.random.normal(0, math.sqrt(1 - H**2), N) child_earning_phenotype = H * child_earning_genotype + np.random.normal(0, math.sqrt(1 - H**2), N) parent_earning_phenotype = mom_earning_phenotype + dad_earning_phenotype parent_earnings_normalized = parent_earning_phenotype / math.sqrt(2) parent_child_earning_slope = np.cov([parent_earnings_normalized, child_earning_phenotype])[0][1] print(parent_child_earning_slope) # In this model # * 1/8th of the variance in child income is explainable by parental income # * half of the variance in child income is due to genes # So, Chetty's method of controlling for parental income is extremely imperfect