Hi,
I know I’ve posted a few issues but I am now with the following.
I have document collection Matches that have documents like so:
{
match_id: 5132545461,
start_time: 1582108941,
cluster: 236,
picks_bans: [
{
is_pick: true,
hero_id: 105,
team: 0,
order: 0,
ord: 0,
match_id: 5132545461
},
{
is_pick: false,
hero_id: 66,
team: 1,
order: 1,
ord: 1,
match_id: 5132545461
},
],
}
with the following Indexes that also been detailed here
{
name: "by_leagueid",
unique: false,
serialized: true,
source: "Matches",
values: [
{
field: ["data", "leagueid"]
},
{
field: ["ref"]
}
]
}
{
name: "by_start_time",
unique: false,
serialized: true,
source: "Matches",
values: [
{
field: ["data", "start_time"]
},
{
field: ["ref"]
}
]
}
CreateIndex({
name: "hero_id_bans",
source: {
collection: Collection("Matches"),
fields: {
hero_banned: Query(
Lambda(
"matchDoc",
Map(
Filter(
Select(["data", "picks_bans"], Var("matchDoc")),
Lambda(
"pick_ban",
Equals(Select("is_pick", Var("pick_ban")), false)
)
),
Lambda("x", Select(["hero_id"], Var("x")))
)
)
)
}
},
terms: [{ binding: "hero_banned" }],
values: [{ field: "ref" }]
})
The problem I am facing is having the joins to count the amount of hero_ids picked or banned within my document collection as it scales, at first I’ve tried the following:
Let(
{
matches: Map(
Paginate(Intersection(
Join(
Range(
Match(Index("by_leagueid")),
11517,
11517
),
Lambda(["value", "ref"], Match(Index("ref_by_ref"), Var("ref")))
),
Join(
Range(
Match(Index("by_start_time")),
1574971461,
1575232551
),
Lambda(["value", "ref"], Match(Index("ref_by_ref"), Var("ref")))
)
)),
Lambda("ref", Get(Var("ref")))
),
pbs_data: Reduce(Lambda(["accumulator", "value"], Append(Var("value"), Var("accumulator"))), Array(), Map(Var("matches"),Lambda("match", Select(["data", "picks_bans"], Var("match"))))),
heroes_picks: Map(Filter(Select(['data', 0], Var("pbs_data")), Lambda("pick_ban", Equals(Select("is_pick", Var("pick_ban")), false))), Lambda("pick", Select("hero_id", Var("pick")))),
heroes_bans: Map(Filter(Select(['data', 0], Var("pbs_data")), Lambda("pick_ban", Equals(Select("is_pick", Var("pick_ban")), false))), Lambda("pick", Select("hero_id", Var("pick")))),
unique_heroes: Distinct(Union(Var("heroes_picks"),Var("heroes_bans"))),
pbs: Map(
Var("unique_heroes"),
Lambda(
"hero",
Let(
{
p_heroes: Filter(Var("heroes_picks"), Lambda("heroid", Equals(Var("hero"), Var("heroid")))),
b_heroes: Filter(Var("heroes_bans"), Lambda("heroid", Equals(Var("hero"), Var("heroid")))),
},
{
hero: Var("hero"),
total_picks: Count(Var("p_heroes")),
total_bans: Count(Var("b_heroes")),
popularity: Add(Count(Var("p_heroes")),Count(Var("b_heroes"))),
}
)
)
),
},
{
heroes_picks_bans_popularity: Var("pbs")
}
)
which has limit of the amount of matches I filter in for my variable matches, the ceiling being about 70 matches before I run into compute OPS limit of 12k but doesn’t seem impacted by the size of my match collection.
I then tried to do the following but this now instead seems to scale with my collection and is causing issues, I am in need of help to figure out how to make it scalable no matter how big my collection of match documents is.
Let(
{
matches: Intersection(
Join(
Range(
Match(Index("by_leagueid")),
11517,
11517
),
Lambda(["value", "ref"], Match(Index("ref_by_ref"), Var("ref")))
),
Join(
Range(
Match(Index("by_start_time")),
1574971461,
1575232551
),
Lambda(["value", "ref"], Match(Index("ref_by_ref"), Var("ref")))
)
),
unique_heroes: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129],
hero_id_pbp: Map(
Var("unique_heroes"),
Lambda(
"hero_id",
Let(
{
hero_id_pick_count: Count(
Intersection(
Var("matches"),
Match(Index("hero_id_picks"), Var("hero_id"))
)
),
hero_id_ban_count: Count(
Intersection(
Var("matches"),
Match(Index("hero_id_bans"), Var("hero_id"))
)
),
picks_bans_count: Add(
Var("hero_id_pick_count"),
Var("hero_id_ban_count")
)
},
{
hero_id: Var("hero_id"),
no_picks: Var("hero_id_pick_count"),
no_bans: Var("hero_id_ban_count"),
popularity: Var("picks_bans_count")
}
)
)
)
},
{ heroes_stats: Var("hero_id_pbp") }
)
Currently for collection of around 60 matches, the queries get the following cost results: