1+ /*
2+ * Copyright (c) 2017-2026 Ronald Brill
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ * https://www.apache.org/licenses/LICENSE-2.0
8+ *
9+ * Unless required by applicable law or agreed to in writing, software
10+ * distributed under the License is distributed on an "AS IS" BASIS,
11+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ * See the License for the specific language governing permissions and
13+ * limitations under the License.
14+ */
15+ package org .htmlunit .cyberneko .util ;
16+
17+ import java .util .Arrays ;
18+ import java .util .HashMap ;
19+
20+ /**
21+ * A cache that interns strings from char[] buffer regions.
22+ * <p>
23+ * On cache hits, the same {@code String} instance is returned,
24+ * avoiding repeated allocation for frequently occurring names
25+ * (e.g., HTML tag names and attribute names).
26+ *
27+ * <p>The lookup key points directly into the caller's buffer
28+ * (zero-copy), and only when a new entry is added does the key
29+ * data get copied into an independent array.
30+ *
31+ * @author Ronald Brill
32+ * @since 5.0.0
33+ */
34+ public class StringCache {
35+ // HTML has ~100 distinct tag names + ~50 common attribute names
36+ // At 0.75 load factor, capacity 256 avoids some rehash
37+ private final HashMap <CharBufferKey , String > cache_ = new HashMap <>(256 );
38+
39+ private final CharBufferKey lookupKey_ = new CharBufferKey ();
40+
41+ /**
42+ * Returns a cached {@code String} for the given char buffer region.
43+ * If no cached entry exists, a new {@code String} is created, cached, and returned.
44+ *
45+ * @param ch the character array (may be a shared/reused buffer)
46+ * @param offset the start offset of the name in {@code ch}
47+ * @param length the number of characters
48+ * @return the cached string
49+ */
50+ public String get (final char [] ch , final int offset , final int length ) {
51+ lookupKey_ .update (ch , offset , length );
52+ String val = cache_ .get (lookupKey_ );
53+
54+ if (val == null ) {
55+ val = new String (ch , offset , length );
56+ cache_ .put (lookupKey_ .detach (), val );
57+ }
58+
59+ return val ;
60+ }
61+
62+ /**
63+ * A lightweight key that wraps a region of a {@code char[]} for use
64+ * as a {@link HashMap} lookup key. The {@link #update} method points
65+ * the key at a caller-owned buffer (zero-copy); {@link #detach}
66+ * creates an independent copy suitable for long-term storage in the map.
67+ */
68+ static final class CharBufferKey {
69+ private char [] data_ ;
70+ private int offset_ ;
71+ private int length_ ;
72+ private int hash_ ;
73+
74+ /**
75+ * Points this key at a region of an external char array.
76+ * No copy is made; the caller must not mutate the region
77+ * while this key is used for a lookup.
78+ *
79+ * @param ch the character array
80+ * @param offset the start offset
81+ * @param length the number of characters
82+ */
83+ void update (final char [] ch , final int offset , final int length ) {
84+ data_ = ch ;
85+ offset_ = offset ;
86+ length_ = length ;
87+
88+ int h = 0 ;
89+ for (int i = offset ; i < offset + length ; i ++) {
90+ h = ((h << 5 ) - h ) + ch [i ];
91+ }
92+ hash_ = h ;
93+ }
94+
95+ /**
96+ * Creates an independent copy of this key whose data is
97+ * not shared with any external buffer. The copy is suitable
98+ * for storing as a long-lived map key.
99+ *
100+ * @return a detached copy of this key
101+ */
102+ CharBufferKey detach () {
103+ final CharBufferKey detached = new CharBufferKey ();
104+ detached .data_ = new char [length_ ];
105+ System .arraycopy (data_ , offset_ , detached .data_ , 0 , length_ );
106+ detached .offset_ = 0 ;
107+ detached .length_ = length_ ;
108+ detached .hash_ = hash_ ;
109+ return detached ;
110+ }
111+
112+ @ Override
113+ public int hashCode () {
114+ return hash_ ;
115+ }
116+
117+ @ Override
118+ public boolean equals (final Object o ) {
119+ if (o instanceof CharBufferKey ob ) {
120+ if (ob .length_ != length_ ) {
121+ return false ;
122+ }
123+ return Arrays .mismatch (
124+ data_ , offset_ , offset_ + length_ ,
125+ ob .data_ , ob .offset_ , ob .offset_ + ob .length_ ) < 0 ;
126+ }
127+ return false ;
128+ }
129+
130+ @ Override
131+ public String toString () {
132+ return new String (data_ , offset_ , length_ );
133+ }
134+ }
135+ }
0 commit comments