File size: 5,327 Bytes
f27679f
 
b8c4528
 
 
1185ec1
f27679f
6967c22
f70dd7e
29f166e
 
f27679f
 
 
 
 
b8c4528
4c34e70
 
f27679f
4c34e70
f27679f
8f2b05f
b8c4528
f27679f
b8c4528
 
 
4c34e70
 
 
 
 
 
 
b8c4528
 
 
 
4c34e70
 
 
 
 
f27679f
8f2b05f
 
b8c4528
 
f27679f
8f2b05f
 
 
 
 
b8c4528
8f2b05f
 
 
 
 
 
 
f27679f
29f166e
 
 
 
b8c4528
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f2b05f
 
 
f27679f
8f2b05f
4c34e70
8f2b05f
 
 
 
 
 
 
4c34e70
8f2b05f
4c34e70
8f2b05f
 
 
 
 
 
 
4c34e70
8f2b05f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c34e70
 
29f166e
 
8f2b05f
29f166e
 
 
 
 
 
 
 
 
 
 
 
8f2b05f
 
 
 
 
 
 
 
f27679f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"use server"

// import { distance } from "fastest-levenshtein"
import MiniSearch from "minisearch"

import { VideoInfo } from "@/types/general"

import { getVideoIndex } from "./getVideoIndex"
import { extendVideosWithStats } from "./extendVideosWithStats"
import { isHighQuality } from "../utils/isHighQuality"
import { isAntisocial } from "../utils/isAntisocial"

const HARD_LIMIT = 100

// this just return ALL videos on the platform
export async function getVideos({
  query = "",
  mandatoryTags = [],
  niceToHaveTags = [],
  sortBy = "date",
  ignoreVideoIds = [],
  maxVideos = HARD_LIMIT,
  neverThrow = false,
  renewCache = true,
}: {
  // optional search query
  query?: string

  // the videos MUST include those tags
  mandatoryTags?: string[]

  // tags that we should try to use to filter the videos,
  // but it isn't a hard limit - TODO: use some semantic search here?
  niceToHaveTags?: string[]

  sortBy?:
    | "random" // for the home
    | "date" // most recent first
    | "match" // how close we are from the query

  // ignore some ids - this is used to not show the same videos again
  // eg. videos already watched, or disliked etc
  ignoreVideoIds?: string[]

  maxVideos?: number

  neverThrow?: boolean

  renewCache?: boolean
}): Promise<VideoInfo[]> {
  try {
    // the index is gonna grow more and more,
    // but in the future we will use some DB eg. Prisma or sqlite
    const published = await getVideoIndex({
      status: "published",
      renewCache,
    })

    let allPotentiallyValidVideos = Object.values(published)
    
    if (ignoreVideoIds.length) {
      allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
    }

    if (ignoreVideoIds.length) {
      allPotentiallyValidVideos = allPotentiallyValidVideos.filter(video => !ignoreVideoIds.includes(video.id))
    }

    const q = query.trim().toLowerCase()

    if (sortBy === "match") {
      // now obviously we are going to migrate to a database search instead,
      // maybe a bit of vector search too,
      // but let's say that for now this is good enough
      let miniSearch = new MiniSearch({
        fields: ['label', 'description', 'tags'], // fields to index for full-text search
        storeFields: ['id'] // fields to return with search results
      })
      
      miniSearch.addAll(allPotentiallyValidVideos)
      
      // mini search has plenty of options, see:
      // https://www.npmjs.com/package/minisearch
      const results = miniSearch.search(query, {
        prefix: true, // "moto" will match "motorcycle"
        fuzzy: 0.2,
        // to search within a specific category
        // filter: (result) => result.category === 'fiction'
      })

      allPotentiallyValidVideos = allPotentiallyValidVideos.filter(v => results.some(r => r.id === v.id))

    } if (sortBy === "date") {
      allPotentiallyValidVideos.sort((a, b) => b.updatedAt.localeCompare(a.updatedAt))
    } else {
      allPotentiallyValidVideos.sort(() => Math.random() - 0.5)
    }

    let videosMatchingFilters: VideoInfo[] = allPotentiallyValidVideos

    // filter videos by mandatory tags, or else we return everything
    const mandatoryTagsList = mandatoryTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
    if (mandatoryTagsList.length) {
      videosMatchingFilters = allPotentiallyValidVideos.filter(video => 
        video.tags.some(tag =>
          mandatoryTagsList.includes(tag.toLowerCase().trim())
        )
      )
    }

    // filter videos by mandatory tags, or else we return everything
    const niceToHaveTagsList = niceToHaveTags.map(tag => tag.toLowerCase().trim()).filter(tag => tag)
    if (niceToHaveTagsList.length) {
      videosMatchingFilters = videosMatchingFilters.filter(video => 
        video.tags.some(tag =>
          mandatoryTagsList.includes(tag.toLowerCase().trim())
        )
      )

      // if we don't have enough videos
      if (videosMatchingFilters.length < maxVideos) {
        // count how many we need
        const nbMissingVideos = maxVideos - videosMatchingFilters.length
        
        // then we try to fill the gap with valid videos from other topics
        const videosToUseAsFiller = allPotentiallyValidVideos
          .filter(video => !videosMatchingFilters.some(v => v.id === video.id)) // of course we don't reuse the same
          // .sort(() => Math.random() - 0.5) // randomize them
          .slice(0, nbMissingVideos) // and only pick those we need

        videosMatchingFilters = [
          ...videosMatchingFilters,
          ...videosToUseAsFiller,
        ]
      }
    }

    const sanitizedVideos = videosMatchingFilters.filter(v => !isAntisocial(v))
        
    // we enforce the max limit of HARD_LIMIT (eg. 100)
    const limitedNumberOfVideos = sanitizedVideos.slice(0, Math.min(HARD_LIMIT, maxVideos))

    // we ask Redis for the freshest stats
    const videosWithStats = await extendVideosWithStats(limitedNumberOfVideos)

    const highQuality = videosWithStats.filter(v => isHighQuality(v))
    const lowQuality = videosWithStats.filter(v => !isHighQuality(v))
 
    return [
      ...highQuality,
      ...lowQuality
    ]
  } catch (err) {
    if (neverThrow) {
      console.error("failed to get videos:", err)
      return []
    }

    throw err
  }
}