3. Algorithm for binning data

Here, we’ll discuss the algorithm for binning data.

library(quollr)
library(ggplot2)
library(dplyr)

First, you need to obtain the number of bins along the x and y axes in the hexagonal grid. Here, you need to decide which is the radius of outer circle of the hexagon (hex_size), total buffer along the axes.

Next, by passing the preprocessed 2D embedding data and hexagonal grid configurations, you can obtain the hexagonal binning information like centroid coordinates, hexagonal polygon coordinates, the standardise counts within each hexagon etc. Other important parameters are hexagonal grid’s starting coordinates.

## Data set with all possible centroids in the hexagonal grid
all_centroids_df <- as.data.frame(do.call(cbind, hb_obj$centroids))
glimpse(all_centroids_df)
#> Rows: 40
#> Columns: 3
#> $ hexID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1…
#> $ c_x   <dbl> -0.1732051, 0.1732051, 0.5196152, 0.8660254, 1.2124356, 0.000000…
#> $ c_y   <dbl> -0.15, -0.15, -0.15, -0.15, -0.15, 0.15, 0.15, 0.15, 0.15, 0.15,…

## Generate all coordinates of hexagons
hex_grid <- as.data.frame(do.call(cbind, hb_obj$hex_poly))
glimpse(hex_grid)
#> Rows: 240
#> Columns: 3
#> $ hex_poly_id <dbl> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4…
#> $ x           <dbl> 0.0000000, 0.0000000, -0.1732051, -0.3464102, -0.3464102, …
#> $ y           <dbl> -0.05040708, -0.24959292, -0.34918584, -0.24959292, -0.050…

## To obtain the standardise counts within hexbins
counts_df <- as.data.frame(do.call(cbind, hb_obj$std_cts))
df_bin_centroids <- extract_hexbin_centroids(centroids_df = all_centroids_df, 
                                             counts_df = counts_df)
ggplot(data = hex_grid, aes(x = x, y = y)) + 
  geom_polygon(fill = "white", color = "black", aes(group = hex_poly_id)) +
  geom_point(data = all_centroids_df, aes(x = c_x, y = c_y), color = "red") +
  coord_fixed()

ggplot(data = hex_grid, aes(x = x, y = y)) + 
  geom_polygon(fill = "white", color = "black", aes(group = hex_poly_id)) +
  geom_point(data = all_centroids_df, aes(x = c_x, y = c_y), color = "red") +
  geom_point(data = df_bin_centroids, aes(x = c_x, y = c_y), color = "purple") +
  coord_fixed()

ggplot(data = hex_grid, aes(x = x, y = y)) + 
  geom_polygon(fill = "white", color = "black", aes(group = hex_poly_id)) +
  geom_point(data = s_curve_noise_umap_scaled, aes(x = UMAP1, y = UMAP2), color = "blue") +
  coord_fixed()

hex_grid_with_counts <- dplyr::left_join(hex_grid, counts_df, by = c("hex_poly_id" = "hb_id"))

ggplot(data = hex_grid_with_counts, aes(x = x, y = y)) +
  geom_polygon(color = "black", aes(group = hex_poly_id, fill = std_counts)) +
  geom_text(data = all_centroids_df, aes(x = c_x, y = c_y, label = hexID)) +
  scale_fill_viridis_c(direction = -1, na.value = "#ffffff") +
  coord_fixed()