我应该使用哪种JPA + Hibernate数据类型来支持PostgreSQL数据库中的向量扩展,以便允许我使用JPA实体创建嵌入式?
CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3));
我应该使用哪种JPA + Hibernate数据类型来支持PostgreSQL数据库中的向量扩展,以便允许我使用JPA实体创建嵌入式?
CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3));
<dependency>
<groupId>io.hypersistence</groupId>
<artifactId>hypersistence-utils-hibernate-55</artifactId>
<version>3.5.0</version>
</dependency>
import com.fasterxml.jackson.annotation.JsonInclude;
import io.hypersistence.utils.hibernate.type.json.JsonType;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.hibernate.annotations.Type;
import org.hibernate.annotations.TypeDef;
import javax.persistence.*;
import java.util.List;
@Data
@NoArgsConstructor
@Entity
@Table(name = "items")
@JsonInclude(JsonInclude.Include.NON_NULL)
@TypeDef(name = "json", typeClass = JsonType.class)
public class Item {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private Long id;
@Type(type = "json")
@Column(columnDefinition = "vector")
private List<Double> embedding;
}
import org.springframework.data.jpa.repository.JpaRepository;
public interface ItemRepository extends JpaRepository<Item, Long> {
// 根据向量查找最近的邻居,例如 value = "[1,2,3]"
// 这也可以工作,cast 相当于 postgresql 中的 :: 运算符
//@Query(nativeQuery = true, value = "SELECT * FROM items ORDER BY embedding <-> cast(? as vector) LIMIT 5")
@Query(nativeQuery = true, value = "SELECT * FROM items ORDER BY embedding <-> ? \\:\\:vector LIMIT 5")
List<Item> findNearestNeighbors(String value);
// 根据同一表中的记录查找最近的邻居
@Query(nativeQuery = true, value = "SELECT * FROM items WHERE id != :id ORDER BY embedding <-> (SELECT embedding FROM items WHERE id = :id) LIMIT 5")
List<Item> findNearestNeighbors(Long id);
}
@Autowired
private ItemRepository itemRepository;
@Test
@Rollback(false)
@Transactional
public void createItem() {
Item item = new Item();
Random rand = new Random();
List<Double> embedding = new ArrayList<>();
for (int i = 0; i < 3; i++)
embedding.add(rand.nextDouble());
item.setEmbedding(embedding);
itemRepository.save(item);
}
@Test
public void loadItems() {
final List<Item> items = itemRepository.findAll();
System.out.println(items);
}
@Test
public void findNearestNeighbors() {
final String value = "[0.1, 0.2, 0.3]";
final List<Item> items = itemRepository.findNearestNeighbors(value);
System.out.println(items);
}
<!-- https://mvnrepository.com/artifact/io.hypersistence/hypersistence-utils-hibernate-62 -->
<dependency>
<groupId>io.hypersistence</groupId>
<artifactId>hypersistence-utils-hibernate-62</artifactId>
<version>3.5.3</version>
</dependency>
import jakarta.persistence.*;
import org.hibernate.annotations.Type;
import io.hypersistence.utils.hibernate.type.json.JsonType;
import java.util.List;
import java.util.UUID;
@Entity
@Table(name = "items")
public class Item {
@Id
@Column(name = "id")
private UUID id;
@Basic
@Type(JsonType.class)
@Column(name = "embedding", columnDefinition = "vector")
private List<Double> embedding;
...
}
@Repository
public interface ItemRepository extends JpaRepository<Item, UUID> {
@Query(nativeQuery = true,
value = "SELECT * FROM items ORDER BY embedding <-> cast(? as vector) LIMIT 3")
List<Item> findNearestNeighbors(String embedding);
}