N+1 and Cartesian product problems


#1

I’m trying to coop with the N+1 problem one the one side and the Cartesian product problem on the other side. I’ve got the following classes to illustrate (not my real entities, but here for illustration purposes):

@Entity
public class Customer implements Serializable {
  @Id
  @Column(name = "ID", unique = true, nullable = false, precision = 10)
  @GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "CUSTOMER_IDNR")
  private Long id;

  @OneToMany(mappedBy = "customer", cascade = CascadeType.ALL, fetch = FetchType.LAZY)
  private Set<Basket> baskets = new HashSet<>();

  @ManyToOne(fetch = FetchType.LAZY, cascade = {CascadeType.PERSIST, CascadeType.MERGE})
  @JoinColumn(name = "ADDRESS_ID")
  private Address address;

  public Long getId() {
    return id;
  }

  public void setId(Long id) {
    this.id = id;
  }

  public Set<Basket> getBaskets() {
    return baskets;
  }

  public void addBasket(Basket basket) {
    baskets.add(basket);
    basket.setCustomer(this);
  }

  public Address getAddress() {
    return address;
  }

  public void setAddress(Address address) {
    this.address = address;
  }
}

@Entity
public class Address implements Serializable {
  @Id
  @Column(name = "ID", unique = true, nullable = false, precision = 10)
  @GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "ADDRESS_IDNR")
  private Long id;

  @Column(name = "STREET", length = 100, nullable = false)
  private String street;

  @Column(name = "NUMBER", precision = 4, nullable = false)
  private Integer number;

  @Column(name = "POSTALCODE", length = 10, nullable = false)
  private String postalcode;

  public Long getId() {
    return id;
  }

  public void setId(Long id) {
    this.id = id;
  }

  public String getStreet() {
    return street;
  }

  public void setStreet(String street) {
    this.street = street;
  }

  public Integer getNumber() {
    return number;
  }

  public void setNumber(Integer number) {
    this.number = number;
  }

  public String getPostalcode() {
    return postalcode;
  }

  public void setPostalcode(String postalcode) {
    this.postalcode = postalcode;
  }

  @Override
  public String toString() {
    return String.format("%s %s, %s", street, number, postalcode);
  }
}

@Entity
public class Basket implements Serializable {
  @Id
  @Column(name = "ID", unique = true, nullable = false, precision = 10)
  @GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "BASKET_IDNR")
  private Long id;

  @OneToMany(mappedBy = "basket", cascade = CascadeType.ALL, fetch = FetchType.LAZY)
  private Set<Fruit> fruits = new HashSet<>();

  @OneToMany(mappedBy = "basket", cascade = CascadeType.ALL, fetch = FetchType.LAZY)
  private Set<Vegetable> vegetables = new HashSet<>();

  @ManyToOne(fetch = FetchType.LAZY, cascade = CascadeType.PERSIST)
  @JoinColumn(name = "CUSTOMER_ID")
  private Customer customer;

  public Long getId() {
    return id;
  }

  public void setId(Long id) {
    this.id = id;
  }

  public Set<Fruit> getFruits() {
    return fruits;
  }

  public void addFruit(Fruit fruit) {
    fruits.add(fruit);
    fruit.setBasket(this);
  }

  public Set<Vegetable> getVegetables() {
    return vegetables;
  }

  public void addVegetable(Vegetable vegetable) {
    vegetables.add(vegetable);
    vegetable.setBasket(this);
  }

  public Customer getCustomer() {
    return customer;
  }

  public void setCustomer(Customer customer) {
    this.customer = customer;
  }
}

@Entity
public class Fruit implements Serializable {
  @Id
  @Column(name = "ID", unique = true, nullable = false, precision = 10)
  @GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "FRUIT_IDNR")
  private Long id;

  @Column(name = "NAME", length = 100, nullable = false)
  private String name;

  @ManyToOne(fetch = FetchType.LAZY, cascade = CascadeType.PERSIST)
  @JoinColumn(name = "BASKET_ID")
  private Basket basket;

  public Long getId() {
    return id;
  }

  public void setId(Long id) {
    this.id = id;
  }

  public String getName() {
    return name;
  }

  public void setName(String name) {
    this.name = name;
  }

  public Basket getBasket() {
    return basket;
  }

  public void setBasket(Basket basket) {
    this.basket = basket;
  }
}

@Entity
public class Vegetable implements Serializable {
  @Id
  @Column(name = "ID", unique = true, nullable = false, precision = 10)
  @GeneratedValue(strategy = GenerationType.SEQUENCE, generator = "VEGETABLE_IDNR")
  private Long id;

  @Column(name = "NAME", length = 100, nullable = false)
  private String name;

  @ManyToOne(fetch = FetchType.LAZY, cascade = CascadeType.PERSIST)
  @JoinColumn(name = "BASKET_ID")
  private Basket basket;

  public Long getId() {
    return id;
  }

  public void setId(Long id) {
    this.id = id;
  }

  public String getName() {
    return name;
  }

  public void setName(String name) {
    this.name = name;
  }

  public Basket getBasket() {
    return basket;
  }

  public void setBasket(Basket basket) {
    this.basket = basket;
  }
}

There could be multiple customers with multiple baskets, fruits and vegetables. I’m about to use each customer for processing and editing, hence using the enity instead of a DTO. When I try to avoid the N+1 problem, I’m issuing the following HQL-query:

SELECT customer FROM Customer customer JOIN FETCH customer.baskets basket JOIN FETCH basket.fruits JOIN FETCH basket.vegetables;

leading to the SQL-query

    select
        customer0_.ID as ID1_2_0_,
        baskets1_.ID as ID1_1_1_,
        fruits2_.ID as ID1_3_2_,
        vegetables3_.ID as ID1_4_3_,
        customer0_.ADDRESS_ID as ADDRESS_2_2_0_,
        baskets1_.CUSTOMER_ID as CUSTOMER2_1_1_,
        baskets1_.CUSTOMER_ID as CUSTOMER2_1_0__,
        baskets1_.ID as ID1_1_0__,
        fruits2_.BASKET_ID as BASKET_I3_3_2_,
        fruits2_.NAME as NAME2_3_2_,
        fruits2_.BASKET_ID as BASKET_I3_3_1__,
        fruits2_.ID as ID1_3_1__,
        vegetables3_.BASKET_ID as BASKET_I3_4_3_,
        vegetables3_.NAME as NAME2_4_3_,
        vegetables3_.BASKET_ID as BASKET_I3_4_2__,
        vegetables3_.ID as ID1_4_2__ 
    from
        Customer customer0_ 
    inner join
        Basket baskets1_ 
            on customer0_.ID=baskets1_.CUSTOMER_ID 
    inner join
        Fruit fruits2_ 
            on baskets1_.ID=fruits2_.BASKET_ID 
    inner join
        Vegetable vegetables3_ 
            on baskets1_.ID=vegetables3_.BASKET_ID
    ;

If I have 100 customers with each customer having 5 baskets with 20 fruits and 10 vegetables in each basket leads to 100x5x20x10=100.000 records, my cartesian product problem. So this is obviously not my solution.

Then I was thinking of splitting the queries is several queries to retrieve the results. The first query just retrieves the customers and for each customer a second query issues a query to retrieve all baskets, including fruits and vegetables, for a specific customer.

SELECT customer FROM Customer customer;

SELECT basket FROM Basket basket JOIN FETCH basket.fruits fruits JOIN FETCH basket.vegetables vegetables WHERE basket.customer.id = 1;

leading to

select
        customer0_.ID as ID1_2_,
        customer0_.ADDRESS_ID as ADDRESS_2_2_ 
    from
        Customer customer0_
;

select
        basket0_.ID as ID1_1_0_,
        fruits1_.ID as ID1_3_1_,
        vegetables2_.ID as ID1_4_2_,
        basket0_.CUSTOMER_ID as CUSTOMER2_1_0_,
        fruits1_.BASKET_ID as BASKET_I3_3_1_,
        fruits1_.NAME as NAME2_3_1_,
        fruits1_.BASKET_ID as BASKET_I3_3_0__,
        fruits1_.ID as ID1_3_0__,
        vegetables2_.BASKET_ID as BASKET_I3_4_2_,
        vegetables2_.NAME as NAME2_4_2_,
        vegetables2_.BASKET_ID as BASKET_I3_4_1__,
        vegetables2_.ID as ID1_4_1__ 
    from
        Basket basket0_ 
    inner join
        Fruit fruits1_ 
            on basket0_.ID=fruits1_.BASKET_ID 
    inner join
        Vegetable vegetables2_ 
            on basket0_.ID=vegetables2_.BASKET_ID 
    where
        basket0_.CUSTOMER_ID=1
;

This issues 1 query for the customers and for each customer 5x20x10=1000 records are retrieved. In total 10.000 records are retrieved from the database, ten times less then the initial query. Now when I iterate over the baskets of each customer, a third query is fired by Hibernate

for (Basket basket : customer.getBaskets()) {
   for (Fruit fruit : basket.getFruits()) {
      System.out.println("\t\tFruit: " + fruit.getName());
   }

    for (Vegetable vegetable : basket.getVegetables()) {
      System.out.println("\t\tVegetable: " + vegetable.getName());
    }
}

select
        baskets0_.CUSTOMER_ID as CUSTOMER2_1_0_,
        baskets0_.ID as ID1_1_0_,
        baskets0_.ID as ID1_1_1_,
        baskets0_.CUSTOMER_ID as CUSTOMER2_1_1_ 
    from
        Basket baskets0_ 
    where
        baskets0_.CUSTOMER_ID=?
;

I believe this is, because in the Customer entity the basket is not loaded with the first query and the second query only loads the baskets, but not adjusting the customer.

If I issued a query to join fetch the baskets with the customer, than no extra query is executed when I iterate over the baskets and fruits/vegetables.

SELECT customer FROM Customer customer JOIN FETCH customer.baskets basket;

SELECT basket FROM Basket basket JOIN FETCH basket.fruits fruits JOIN FETCH basket.vegetables vegetables WHERE basket.customer.id = 1;

select
        customer0_.ID as ID1_2_0_,
        baskets1_.ID as ID1_1_1_,
        customer0_.ADDRESS_ID as ADDRESS_2_2_0_,
        baskets1_.CUSTOMER_ID as CUSTOMER2_1_1_,
        baskets1_.CUSTOMER_ID as CUSTOMER2_1_0__,
        baskets1_.ID as ID1_1_0__ 
    from
        Customer customer0_ 
    inner join
        Basket baskets1_ 
            on customer0_.ID=baskets1_.CUSTOMER_ID
;

select
        basket0_.ID as ID1_1_0_,
        fruits1_.ID as ID1_3_1_,
        vegetables2_.ID as ID1_4_2_,
        basket0_.CUSTOMER_ID as CUSTOMER2_1_0_,
        fruits1_.BASKET_ID as BASKET_I3_3_1_,
        fruits1_.NAME as NAME2_3_1_,
        fruits1_.BASKET_ID as BASKET_I3_3_0__,
        fruits1_.ID as ID1_3_0__,
        vegetables2_.BASKET_ID as BASKET_I3_4_2_,
        vegetables2_.NAME as NAME2_4_2_,
        vegetables2_.BASKET_ID as BASKET_I3_4_1__,
        vegetables2_.ID as ID1_4_1__ 
    from
        Basket basket0_ 
    inner join
        Fruit fruits1_ 
            on basket0_.ID=fruits1_.BASKET_ID 
    inner join
        Vegetable vegetables2_ 
            on basket0_.ID=vegetables2_.BASKET_ID 
    where
        basket0_.CUSTOMER_ID=1
;

But I do not want to retrieve the baskets in the initial customer query, because I want to process each customer on its own. How can I merge the retrieved baskets (back) into the already retrieved customer entity, so the third query will not be performed?


#2

Try with the following 2 queries:

SELECT customer 
FROM Customer customer 
JOIN FETCH customer.baskets basket 
JOIN FETCH basket.fruits;

This one will fetch Customer with Basket and Fruits. Then you collect the basket identifiers in a List and pass it to the second query:

SELECT v
FROM Vegetable v 
JOIN v.backet b
WHERE b.id IN :basketIds

so that you get the List<Vegetable> associated to the previously selected Backets.


#3

Unfortunately that does not solve my problem. I need to initialize the full customer at the start of my processing logic, using the basket and its contents on different places in the code. Instead of querying it once and the relationships as a parameter to different methods, I was looking for a solution to just pass the customer and use its contents in the different parts of my logic. For a very simple test I have setup the following:

Basket basket1 = new Basket();

Fruit apple = new Fruit();
apple.setName("Apple");

Fruit pear = new Fruit();
pear.setName("Pear");

Fruit orange = new Fruit();
orange.setName("Orange");

Vegetable potato = new Vegetable();
potato.setName("Potato");

Vegetable bean = new Vegetable();
bean.setName("Bean");

Vegetable broccoli = new Vegetable();
broccoli.setName("Broccoli");

basket1.addFruit(apple);
basket1.addFruit(pear);
basket1.addVegetable(potato);
basket1.addVegetable(bean);

Basket basket2 = new Basket();
basket2.addFruit(orange);
basket2.addVegetable(broccoli);

Address address = new Address();
address.setStreet("Churchlane");
address.setNumber(10);
address.setPostalcode("A123456");

Customer customer = new Customer();
customer.addBasket(basket1);
customer.addBasket(basket2);
customer.setAddress(address);

Above queries have been defined in the following:

Query<Customer> customerQuery = session.createQuery("SELECT customer FROM Customer customer JOIN FETCH customer.baskets basket JOIN FETCH basket.fruits", Customer.class);
Customer customerResult = customerQuery.uniqueResult();

List<Long> basketIdentifiers = customerResult.getBaskets().stream().map(Basket::getId).collect(Collectors.toList());

Query<Vegetable> vegetablesQuery = session.createQuery("SELECT v FROM Vegetable v JOIN v.basket b WHERE b.id IN (:basketIdentifiers)");
vegetablesQuery.setParameterList("basketIdentifiers", basketIdentifiers);
vegetablesQuery.list();

for (Basket basket : customer.getBaskets()) {
      System.out.println("Basket: "+basket.getId());
      System.out.println("\tFruits");
      for (Fruit fruit : basket.getFruits()) {
        System.out.println("\t\tFruit: " + fruit.getName());
      }

      System.out.println("\tVegetables");
      for (Vegetable vegetable : basket.getVegetables()) {
        System.out.println("\t\tVegetable: " + vegetable.getName());
      }
      System.out.println();
      System.out.println("-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-");
      System.out.println();
    }

Executing these queries and traversing the customer baskers results in the following output.

Hibernate: 
    select
        customer0_.ID as ID1_2_0_,
        baskets1_.ID as ID1_1_1_,
        fruits2_.ID as ID1_3_2_,
        customer0_.ADDRESS_ID as ADDRESS_2_2_0_,
        baskets1_.CUSTOMER_ID as CUSTOMER2_1_1_,
        baskets1_.CUSTOMER_ID as CUSTOMER2_1_0__,
        baskets1_.ID as ID1_1_0__,
        fruits2_.BASKET_ID as BASKET_I3_3_2_,
        fruits2_.NAME as NAME2_3_2_,
        fruits2_.BASKET_ID as BASKET_I3_3_1__,
        fruits2_.ID as ID1_3_1__ 
    from
        Customer customer0_ 
    inner join
        Basket baskets1_ 
            on customer0_.ID=baskets1_.CUSTOMER_ID 
    inner join
        Fruit fruits2_ 
            on baskets1_.ID=fruits2_.BASKET_ID
Hibernate: 
    select
        vegetable0_.ID as ID1_4_,
        vegetable0_.BASKET_ID as BASKET_I3_4_,
        vegetable0_.NAME as NAME2_4_ 
    from
        Vegetable vegetable0_ 
    inner join
        Basket basket1_ 
            on vegetable0_.BASKET_ID=basket1_.ID 
    where
        basket1_.ID in (
            ? , ?
        )

-------------------------------


Basket: 1
	Fruits
		Fruit: Apple
		Fruit: Pear
	Vegetables
Hibernate: 
    select
        vegetables0_.BASKET_ID as BASKET_I3_4_0_,
        vegetables0_.ID as ID1_4_0_,
        vegetables0_.ID as ID1_4_1_,
        vegetables0_.BASKET_ID as BASKET_I3_4_1_,
        vegetables0_.NAME as NAME2_4_1_ 
    from
        Vegetable vegetables0_ 
    where
        vegetables0_.BASKET_ID=?
		Vegetable: Bean
		Vegetable: Potato

-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-

Basket: 2
	Fruits
		Fruit: Orange
	Vegetables
Hibernate: 
    select
        vegetables0_.BASKET_ID as BASKET_I3_4_0_,
        vegetables0_.ID as ID1_4_0_,
        vegetables0_.ID as ID1_4_1_,
        vegetables0_.BASKET_ID as BASKET_I3_4_1_,
        vegetables0_.NAME as NAME2_4_1_ 
    from
        Vegetable vegetables0_ 
    where
        vegetables0_.BASKET_ID=?
		Vegetable: Broccoli

For each basket, the vegetables-query is executed.


#4

I was looking for a solution to just pass the customer and use its contents in the different parts of my logic

You can also use the FetchMode.SUBSELECT as explained in the User Guide to address that N+1 query issue.

Unfortunately that does not solve my problem.

Of course, it solves your problem. The only thing that you need to do is to detached the entire hierarchy and set the Vegetables to the Basket instances based on their basket association.