Feature Extraction
Transformers
ONNX
Safetensors
yar
endpoints
embedding
retrieval
hyperbolic-geometry
matryoshka
custom_code
Instructions to use YARlabs/v5_Embedding_0.5B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use YARlabs/v5_Embedding_0.5B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="YARlabs/v5_Embedding_0.5B", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("YARlabs/v5_Embedding_0.5B", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import torch | |
| from transformers import AutoTokenizer, AutoModel | |
| def lorentz_dist(u: torch.Tensor, v: torch.Tensor) -> torch.Tensor: | |
| """ | |
| Computes the exact Hyperbolic distance between two batches of Lorentz vectors. | |
| """ | |
| # Lorentz Metric signature (- + + ...) | |
| u_0, u_x = u[..., 0:1], u[..., 1:] | |
| v_0, v_x = v[..., 0:1], v[..., 1:] | |
| # Minkowski inner product | |
| inner_product = -u_0 * v_0 + (u_x * v_x).sum(dim=-1, keepdim=True) | |
| # Avoid numerical instability inside acosh for extremely close vectors | |
| inner_product = torch.min(inner_product, torch.tensor(-1.0, device=u.device)) | |
| return torch.acosh(-inner_product).squeeze(-1) | |
| def main(): | |
| model_id = "YARlabs/v5_Embedding" # Ensure you have internet connection to fetch the model, or use a local path like "." if running locally | |
| print(f"Loading {model_id}...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) | |
| model = AutoModel.from_pretrained(model_id, trust_remote_code=True) | |
| model.eval() | |
| texts = [ | |
| "What is the capital of France?", | |
| "Paris is the capital of France.", | |
| "Berlin is the capital of Germany." | |
| ] | |
| print("Tokenizing texts...") | |
| inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt") | |
| print("Generating Matryoshka Lorentz Embeddings with dimension 64...") | |
| with torch.no_grad(): | |
| lorentz_vectors = model(**inputs, target_dim=64) | |
| print(f"Vectors shape: {lorentz_vectors.shape}") | |
| # Calculate distances | |
| dist_correct = lorentz_dist(lorentz_vectors[0], lorentz_vectors[1]) | |
| dist_wrong = lorentz_dist(lorentz_vectors[0], lorentz_vectors[2]) | |
| print(f"\nDistance (Question <-> Correct Answer): {dist_correct.item():.4f}") | |
| print(f"Distance (Question <-> Wrong Answer): {dist_wrong.item():.4f}") | |
| if dist_correct.item() < dist_wrong.item(): | |
| print("\n✅ Semantic search successfully retrieved the closest context!") | |
| if __name__ == "__main__": | |
| # If testing locally, you can change model_id to "." | |
| main() | |