diff --git a/adloc/_ransac.py b/adloc/_ransac.py index 3d125ef..d6d5550 100644 --- a/adloc/_ransac.py +++ b/adloc/_ransac.py @@ -422,6 +422,7 @@ def fit(self, X, y, sample_weight=None, **kwargs): # number of data samples n_samples = X.shape[0] sample_idxs = np.arange(n_samples) + prob = X[:, 2] / np.sum(X[:, 2]) # X: idx_sta, type, score, amp self.n_trials_ = 0 max_trials = self.max_trials @@ -432,7 +433,8 @@ def fit(self, X, y, sample_weight=None, **kwargs): break # choose random sample set - subset_idxs = sample_without_replacement(n_samples, min_samples, random_state=random_state) + # subset_idxs = sample_without_replacement(n_samples, min_samples, random_state=random_state) + subset_idxs = np.random.choice(sample_idxs, min_samples, replace=False, p=prob) X_subset = X[subset_idxs] y_subset = y[subset_idxs] diff --git a/adloc/sacloc2d.py b/adloc/sacloc2d.py index 2c32d8c..49b58d9 100644 --- a/adloc/sacloc2d.py +++ b/adloc/sacloc2d.py @@ -124,8 +124,8 @@ def fit(self, X, y=None, event_index=0): yt = y[:, 0] opt = scipy.optimize.minimize( - # self.huber_loss_grad, - self.l2_loss_grad, + self.huber_loss_grad, + # self.l2_loss_grad, x0=self.events[event_index], method="L-BFGS-B", jac=True, @@ -180,7 +180,7 @@ def predict(self, X, event_index=0): def score(self, X, y=None, event_index=0): """ - X: data_frame with columns ["timestamp", "x_km", "y_km", "z_km", "type"] + X: idx_sta, type, score, amp """ if len(y) <= 1: return -np.inf @@ -193,13 +193,20 @@ def score(self, X, y=None, event_index=0): ya = y[:, 1] R2_t = 1 - np.sum((yt - tt) ** 2) / (np.sum((y[:, 0] - np.mean(y[:, 0])) ** 2) + 1e-6) R2_a = 1 - np.sum((ya - amp) ** 2) / (np.sum((y[:, 1] - np.mean(y[:, 1])) ** 2) + 1e-6) + # weight = X[:, 2] + # mu_yt = np.average(yt, weights=weight) + # mu_ya = np.average(ya, weights=weight) + # R2_t = 1 - np.sum(weight * (yt - tt) ** 2) / (np.sum(weight * (yt - mu_yt) ** 2) + 1e-6) + # R2_a = 1 - np.sum(weight * (ya - amp) ** 2) / (np.sum(weight * (ya - mu_ya) ** 2) + 1e-6) R2 = (R2_t + R2_a) / 2 else: tt = output yt = y R2 = 1 - np.sum((yt - tt) ** 2) / (np.sum((y - np.mean(y)) ** 2) + 1e-6) + # weight = X[:, 2] + # mu_y = np.average(yt, weights=weight) + # R2 = 1 - np.sum(weight * (yt - tt) ** 2) / (np.sum(weight * (yt - mu_y) ** 2) + 1e-6) - # print(f"{R2=}") return R2 diff --git a/adloc/utils.py b/adloc/utils.py index 0ec0647..40c9f69 100644 --- a/adloc/utils.py +++ b/adloc/utils.py @@ -14,18 +14,33 @@ def invert(picks, stations, config, estimator, event_index, event_init): """ def is_model_valid(estimator, X, y): - check = True - check = check and (estimator.events[0][2] > 1.0) # depth > 1.0 km - check = check and (estimator.score(X, y) > config["min_score"]) - return check + """ + X: idx_sta, type, score, amp + """ + # n0 = np.sum(X[:, 1] == 0) # P + # n1 = np.sum(X[:, 1] == 1) # S + n0 = np.sum(X[X[:, 1] == 0, 2]) + n1 = np.sum(X[X[:, 1] == 1, 2]) + if not (n0 >= config["min_p_picks"] and n1 >= config["min_s_picks"]): + return False + + if estimator.events[0][2] < 1.0: # depth > 1.0 km + return False + + if estimator.score(X, y) < config["min_score"]: + return False + + return True def is_data_valid(X, y): """ - X: idx_sta, type, score + X: idx_sta, type, score, amp y: t_s """ - n0 = np.sum(X[:, 1] == 0) # P - n1 = np.sum(X[:, 1] == 1) # S + # n0 = np.sum(X[:, 1] == 0) # P + # n1 = np.sum(X[:, 1] == 1) # S + n0 = np.sum(X[X[:, 1] == 0, 2]) + n1 = np.sum(X[X[:, 1] == 1, 2]) return n0 >= config["min_p_picks"] and n1 >= config["min_s_picks"] # At least min P and S picks MIN_PICKS = config["min_picks"] @@ -192,7 +207,7 @@ def invert_location(picks, stations, config, estimator, events_init=None, iter=0 if "ncpu" in config: NCPU = config["ncpu"] else: - NCPU = min(32, mp.cpu_count() - 1) + NCPU = min(64, mp.cpu_count() * 2 - 1) jobs = [] events_inverted = []