import matplotlib.pyplot as plt import numpy as np import seaborn as sns def fractile(l, p, C): """Find the p'th fractile of l for various interpolation schemes. All schemes are linear but vary in their definition of the range of the CDF. Args: l (numpy.array): The array of numbers to find fractiles for. p (numpy.array): The fractiles of interest (as floats between 0 and 1). C (float): Interpolation scheme, given as a float between 0 and 1. 0 corresponds to what Excel's PERCENTILE.EXC calculates. 1/2 corresponds to what MATLAB does. 1 corresponds to what NumPy/Excel's PERCENTILE.INC calculates.""" def get_index(N, p, C): the_index = (N + 1 - 2*C)*p + C - 1 return the_index.clip(0, N-1) s = np.sort(l) x = get_index(len(s), p, C) s = np.append(s, 0) # For C < 1, we add a dummy value to s to take care of the case x = N-1. return s[np.int_(x)]*(1-x%1) + s[np.int_(x)+1]*(x%1) x = np.array([15, 20, 35, 40, 50]) p = np.arange(0, 100.1, 0.1) for v in (0, 0.5, 1): plt.plot(p, fractile(x, p/100, v)) plt.title('Three different linear interpolation schemes') plt.xlabel('Percent rank') plt.ylabel('Percentile value') plt.xlim(0, 100) plt.ylim(14, 51) plt.legend(['$C = 0$', '$C = 1/2$', '$C = 1$'], loc='lower right') plt.savefig('percentile_interpolation.png', dpi=400)