@@ -14,9 +14,8 @@ const (
1414
1515// OptimizeListConfig controls the optimization pipeline behavior.
1616type OptimizeListConfig struct {
17- maxDepth int
18- preservedFields map [string ]bool
19- collectionExtractors map [string ][]string
17+ maxDepth int
18+ preservedFields map [string ]bool
2019}
2120
2221type OptimizeListOption func (* OptimizeListConfig )
@@ -41,20 +40,8 @@ func WithPreservedFields(fields ...string) OptimizeListOption {
4140 }
4241}
4342
44- // WithCollectionExtractors controls how array fields are handled instead of being summarized as "[N items]".
45- // - 1 sub-field: comma-joined into a flat string ("bug, enhancement").
46- // - Multiple sub-fields: keep the array, but trim each element to only those fields.
47- //
48- // These are explicitly exempt from fill-rate filtering; if we asked for the extraction, it's likely important
49- // to preserve the data even if only one item has it.
50- func WithCollectionExtractors (extractors map [string ][]string ) OptimizeListOption {
51- return func (c * OptimizeListConfig ) {
52- c .collectionExtractors = extractors
53- }
54- }
55-
5643// OptimizeList optimizes a list of items by applying flattening, URL removal, zero-value removal,
57- // whitespace normalization, collection summarization, and fill-rate filtering.
44+ // whitespace normalization, and fill-rate filtering.
5845func OptimizeList [T any ](items []T , opts ... OptimizeListOption ) ([]byte , error ) {
5946 cfg := OptimizeListConfig {maxDepth : defaultMaxDepth }
6047 for _ , opt := range opts {
@@ -106,7 +93,7 @@ func flattenInto(item map[string]any, prefix string, result map[string]any, dept
10693}
10794
10895// filterByFillRate drops keys that appear on less than the threshold proportion of items.
109- // Preserved fields and extractor keys always survive.
96+ // Preserved fields always survive.
11097func filterByFillRate (items []map [string ]any , threshold float64 , cfg OptimizeListConfig ) []map [string ]any {
11198 keyCounts := make (map [string ]int )
11299 for _ , item := range items {
@@ -118,8 +105,7 @@ func filterByFillRate(items []map[string]any, threshold float64, cfg OptimizeLis
118105 minCount := int (threshold * float64 (len (items )))
119106 keepKeys := make (map [string ]bool , len (keyCounts ))
120107 for key , count := range keyCounts {
121- _ , hasExtractor := cfg .collectionExtractors [key ]
122- if count > minCount || cfg .preservedFields [key ] || hasExtractor {
108+ if count > minCount || cfg .preservedFields [key ] {
123109 keepKeys [key ] = true
124110 }
125111 }
@@ -138,7 +124,7 @@ func filterByFillRate(items []map[string]any, threshold float64, cfg OptimizeLis
138124}
139125
140126// optimizeItem applies per-item strategies in a single pass: remove URLs,
141- // remove zero-values, normalize whitespace, summarize collections .
127+ // remove zero-values, normalize whitespace.
142128// Preserved fields skip everything except whitespace normalization.
143129func optimizeItem (item map [string ]any , cfg OptimizeListConfig ) map [string ]any {
144130 result := make (map [string ]any , len (item ))
@@ -151,86 +137,16 @@ func optimizeItem(item map[string]any, cfg OptimizeListConfig) map[string]any {
151137 continue
152138 }
153139
154- switch v := value .(type ) {
155- case string :
156- result [key ] = strings .Join (strings .Fields (v ), " " )
157- case []any :
158- if len (v ) == 0 {
159- continue
160- }
161-
162- if preserved {
163- result [key ] = value
164- } else if fields , ok := cfg .collectionExtractors [key ]; ok {
165- if len (fields ) == 1 {
166- result [key ] = extractSubField (v , fields [0 ])
167- } else {
168- result [key ] = trimArrayFields (v , fields )
169- }
170- } else {
171- result [key ] = fmt .Sprintf ("[%d items]" , len (v ))
172- }
173- default :
140+ if s , ok := value .(string ); ok {
141+ result [key ] = strings .Join (strings .Fields (s ), " " )
142+ } else {
174143 result [key ] = value
175144 }
176145 }
177146
178147 return result
179148}
180149
181- // extractSubField pulls a named sub-field from each slice element and joins
182- // them with ", ". Elements missing the field are silently skipped.
183- func extractSubField (items []any , field string ) string {
184- var vals []string
185- for _ , item := range items {
186- m , ok := item .(map [string ]any )
187- if ! ok {
188- continue
189- }
190-
191- v , ok := m [field ]
192- if ! ok || v == nil {
193- continue
194- }
195-
196- switch s := v .(type ) {
197- case string :
198- if s != "" {
199- vals = append (vals , s )
200- }
201- default :
202- vals = append (vals , fmt .Sprintf ("%v" , v ))
203- }
204- }
205-
206- return strings .Join (vals , ", " )
207- }
208-
209- // trimArrayFields keeps only the specified fields from each object in a slice.
210- // The trimmed objects are returned as is, no further strategies are applied.
211- func trimArrayFields (items []any , fields []string ) []any {
212- result := make ([]any , 0 , len (items ))
213- for _ , item := range items {
214- m , ok := item .(map [string ]any )
215- if ! ok {
216- continue
217- }
218-
219- trimmed := make (map [string ]any , len (fields ))
220- for _ , f := range fields {
221- if v , exists := m [f ]; exists {
222- trimmed [f ] = v
223- }
224- }
225-
226- if len (trimmed ) > 0 {
227- result = append (result , trimmed )
228- }
229- }
230-
231- return result
232- }
233-
234150// isURLKey matches "url", "*_url", and their dot-prefixed variants.
235151func isURLKey (key string ) bool {
236152 if idx := strings .LastIndex (key , "." ); idx >= 0 {
0 commit comments