Perception Encoder
Collection
OpenCLIP (PE Core image + text) and timm PE Core, Spatial, Lang (ViT only) weights. NOTE: These weights do not work with original modeling code. • 19 items • Updated • 7
# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("timm/vit_pe_spatial_base_patch16_512.fb", dtype="auto")
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-feature-extraction", model="timm/vit_pe_spatial_base_patch16_512.fb")