Skip to content

Commit cda4993

Browse files
committed
Adds support for add_symbols and set_symbols in BytecodeIonReader
1 parent b574fdb commit cda4993

8 files changed

Lines changed: 1185 additions & 35 deletions

File tree

src/main/java/com/amazon/ion/bytecode/BytecodeIonReader.kt

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import com.amazon.ion.IonType
1010
import com.amazon.ion.SymbolTable
1111
import com.amazon.ion.SymbolToken
1212
import com.amazon.ion.Timestamp
13+
import com.amazon.ion.UnknownSymbolException
1314
import com.amazon.ion.bytecode.BytecodeIonReader.AnnotationHelper.EMPTY_ANNOTATIONS
1415
import com.amazon.ion.bytecode.ir.Debugger
1516
import com.amazon.ion.bytecode.ir.Instructions
@@ -118,12 +119,7 @@ internal class BytecodeIonReader(private var generator: BytecodeGenerator) : Ion
118119

119120
do {
120121
// Move `i` to point to the next instruction.
121-
val length = Instructions.getData(instruction)
122-
val operandCountBits = Instructions.getOperandCountBits(instruction)
123-
// equivalent to `i += if (operandsToSkip == 3) length else operandsToSkip`
124-
// `useOperandCount` is all zeros if `operandsToSkip` is 3, and all ones if `operandsToCount` is smaller than 3.
125-
val useOperandCount = ((operandCountBits - 3) shr 2)
126-
i += (operandCountBits and useOperandCount) or (length and useOperandCount.inv())
122+
i += Instructions.getOperandCount(instruction)
127123

128124
// Load the next instruction
129125
instruction = bytecode[i++]
@@ -202,8 +198,33 @@ internal class BytecodeIonReader(private var generator: BytecodeGenerator) : Ion
202198
context.reset()
203199
}
204200

205-
private fun handleSystemValue(instruction: Int, nextI: Int): Int {
206-
TODO("Implement directive handler")
201+
private fun handleSystemValue(instruction: Int, position: Int): Int {
202+
val op = Instructions.toOperation(instruction)
203+
this.instruction = INSTRUCTION_NOT_SET
204+
bytecodeI = position
205+
206+
when (op) {
207+
Operation.OP_DIRECTIVE_SET_SYMBOLS -> context.readSetSymbolsDirective(this)
208+
Operation.OP_DIRECTIVE_ADD_SYMBOLS -> context.readAddSymbols(this)
209+
Operation.OP_DIRECTIVE_SET_MACROS -> context.readSetMacrosDirective(this)
210+
Operation.OP_DIRECTIVE_ADD_MACROS -> context.readAddMacrosDirective(this)
211+
212+
Operation.OP_DIRECTIVE_USE -> context.readUseDirective(this)
213+
Operation.OP_DIRECTIVE_IMPORT -> context.readImportDirective(this)
214+
Operation.OP_DIRECTIVE_ENCODING -> context.readEncodingDirective(this)
215+
Operation.OP_DIRECTIVE_MODULE -> context.readModuleDirective(this)
216+
217+
else -> TODO()
218+
}
219+
// Ensure that we are positioned on/after the END_CONTAINER instruction.
220+
bytecodeI += Instructions.getOperandCount(this.instruction) + 1
221+
// Clear the current instruction, so that we can advance past the directive's CONTAINER_END
222+
this.instruction = INSTRUCTION_NOT_SET
223+
224+
// This is required after any directive other than ADD/SET macros, so we'll just do this in all cases since it's a cheap operation.
225+
symbolTable = context.getEffectiveSymbolTable()
226+
227+
return bytecodeI
207228
}
208229

209230
override fun getType(): IonType? = OperationKind.ionTypeOf(Operation.toOperationKind(Instructions.toOperation(instruction)))
@@ -544,7 +565,10 @@ internal class BytecodeIonReader(private var generator: BytecodeGenerator) : Ion
544565
Operation.OP_SYMBOL_REF,
545566
Operation.OP_STRING_REF -> generator.readTextReference(position = bytecode[i], length = data)
546567
Operation.OP_SYMBOL_CHAR -> data.toChar().toString()
547-
Operation.OP_SYMBOL_SID -> symbolTable[data]
568+
Operation.OP_SYMBOL_SID -> {
569+
// TODO: Check the size of the actual symbol table (not the `symbolTable` array, which may be over-allocated)
570+
symbolTable[data]
571+
}
548572
else -> throw IonException("Not positioned on a string or symbol value")
549573
}
550574
}
@@ -564,8 +588,7 @@ internal class BytecodeIonReader(private var generator: BytecodeGenerator) : Ion
564588
}
565589
}
566590

567-
// TODO: don't return null
568-
override fun getSymbolTable(): SymbolTable? = null
591+
override fun getSymbolTable(): SymbolTable = context.getLstSnapshot()
569592

570593
override fun byteSize(): Int {
571594
val instruction = this.instruction

src/main/java/com/amazon/ion/bytecode/EncodingContextManager.kt

Lines changed: 102 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,29 @@
22
// SPDX-License-Identifier: Apache-2.0
33
package com.amazon.ion.bytecode
44

5+
import com.amazon.ion.IonException
6+
import com.amazon.ion.IonType
7+
import com.amazon.ion.SystemSymbols
58
import com.amazon.ion.bytecode.util.BytecodeBuffer
69
import com.amazon.ion.bytecode.util.ConstantPool
10+
import com.amazon.ion.bytecode.util.StringPool
11+
import com.amazon.ion.impl.ArrayBackedLstSnapshot
712
import com.amazon.ion.ion_1_1.MacroImpl
813
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
914

1015
/**
1116
* TODO:
1217
* Write more documentation.
13-
* Implement stubbed out methods.
18+
* Implement remaining stubbed out methods.
19+
* Do we need some way to "garbage collect" from the constant pool?
1420
*
1521
* Notes:
1622
*
23+
* Terminology:
24+
* - "effective" symbol/macro table -- the tables that are currently in use and exposed to the reader
25+
* - "spare" symbol/macro table -- essentially we have an object pool with size 1, allowing us to modify the inactive
26+
* tables and then swap them for the active tables once the changes are complete.
27+
*
1728
* It is never safe to remove or modify any existing data in the effective tables. It is safe to append data to those
1829
* tables for an `add_symbols`, `add_macros`, or `use` directive (as long as the active encoding modules are just `$ion` and `_`).
1930
*/
@@ -34,14 +45,21 @@ internal class EncodingContextManager {
3445
)
3546
}
3647

37-
// These make up the effective macro table and effective symbol table
38-
private var macroBytecode = BytecodeBuffer()
39-
private var macroOffsets = BytecodeBuffer()
40-
private var macroNames = ConstantPool()
41-
private var symbols = mutableListOf<String?>().apply { SYSTEM_SYMBOLS.forEach { add(it) } }
42-
43-
// TODO: Do we need the constant pool here?
44-
private var constants = ConstantPool()
48+
class TableSet {
49+
@JvmField var macroBytecode = BytecodeBuffer()
50+
@JvmField var macroOffsets = BytecodeBuffer()
51+
@JvmField var macroNames = ConstantPool()
52+
@JvmField var symbols = StringPool().apply { SYSTEM_SYMBOLS.forEach { add(it) } }
53+
@JvmField var constants = ConstantPool()
54+
55+
fun reset() {
56+
macroBytecode.clear()
57+
macroOffsets.clear()
58+
macroNames.clear()
59+
symbols.truncate(SystemSymbols.ION_1_0_MAX_ID + 1)
60+
constants.clear()
61+
}
62+
}
4563

4664
private class Module(
4765
val symbols: Array<String>,
@@ -54,34 +72,59 @@ internal class EncodingContextManager {
5472
// Tracks only modules _other_ than the system module and default module
5573
private var additionalActiveModules = mutableListOf<Module>()
5674

57-
@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
58-
fun getEffectiveMacroTableBytecode(): IntArray = macroBytecode.unsafeGetArray()
75+
private val effectiveTables = TableSet()
76+
// TODO(simplification): we might not need the spare tables for macros because
77+
// macros are already evaluated before we get to this point.
78+
private val spareTables = TableSet()
5979

6080
@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
61-
fun getEffectiveMacroTableOffsets(): IntArray = macroOffsets.unsafeGetArray()
62-
63-
fun getEffectiveSymbolTable(): Array<String?> = symbols.toTypedArray()
64-
81+
fun getEffectiveMacroTableBytecode(): IntArray = effectiveTables.macroBytecode.unsafeGetArray()
82+
@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
83+
fun getEffectiveMacroTableOffsets(): IntArray = effectiveTables.macroOffsets.unsafeGetArray()
84+
@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
85+
fun getEffectiveSymbolTable(): Array<String?> = effectiveTables.symbols.unsafeGetArray()
6586
@SuppressFBWarnings("IE_EXPOSE_REP", justification = "array is accessible for performance")
66-
fun getEffectiveConstantPool(): Array<Any?> = constants.unsafeGetArray()
87+
fun getEffectiveConstantPool(): Array<Any?> = effectiveTables.constants.unsafeGetArray()
88+
89+
fun getLstSnapshot() = ArrayBackedLstSnapshot(effectiveTables.symbols)
6790

6891
/** Called when encountering an IVM */
6992
fun reset() {
7093
additionalActiveModules.clear()
7194
additionalAvailableModules.clear()
72-
macroBytecode.clear()
73-
macroOffsets.clear()
74-
macroNames.clear()
75-
symbols.clear()
76-
SYSTEM_SYMBOLS.forEach { symbols.add(it) }
77-
constants.clear()
95+
effectiveTables.reset()
7896
}
7997

8098
/**
8199
* The [BytecodeIonReader] should be positioned in the directive, but not on the first value yet.
82100
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
83101
*/
84102
fun readSetSymbolsDirective(reader: BytecodeIonReader) {
103+
if (additionalActiveModules.isNotEmpty()) {
104+
readSetSymbolsWithActiveModules(reader)
105+
return
106+
}
107+
108+
val symbols = spareTables.symbols
109+
symbols.truncate(SystemSymbols.ION_1_0_MAX_ID + 1)
110+
while (true) {
111+
val s = when (reader.next()) {
112+
IonType.SYMBOL,
113+
IonType.STRING -> reader.stringValue()
114+
null -> break
115+
else -> throw IonException("Expected text; found ${reader.type}")
116+
}
117+
symbols.add(s)
118+
}
119+
120+
// Swap the effective and spare symbol tables
121+
spareTables.symbols = effectiveTables.symbols
122+
effectiveTables.symbols = symbols
123+
}
124+
125+
private fun readSetSymbolsWithActiveModules(reader: BytecodeIonReader) {
126+
// Update the default module then call:
127+
// rebuildEffectiveSymbolTable(updateReaderSymbolTable)
85128
TODO()
86129
}
87130

@@ -90,6 +133,26 @@ internal class EncodingContextManager {
90133
* When this method returns, the [BytecodeIonReader] will be positioned at the end of the directive, but not stepped out.
91134
*/
92135
fun readAddSymbols(reader: BytecodeIonReader) {
136+
if (additionalActiveModules.isNotEmpty()) {
137+
readAddSymbolsWithActiveModules(reader)
138+
return
139+
}
140+
141+
val symbols = effectiveTables.symbols
142+
while (true) {
143+
val s = when (reader.next()) {
144+
IonType.SYMBOL,
145+
IonType.STRING -> reader.stringValue()
146+
null -> break
147+
else -> throw IonException("Expected text; found ${reader.type}")
148+
}
149+
symbols.add(s)
150+
}
151+
}
152+
153+
private fun readAddSymbolsWithActiveModules(reader: BytecodeIonReader) {
154+
// Update the default module then call:
155+
// rebuildEffectiveSymbolTable(updateReaderSymbolTable)
93156
TODO()
94157
}
95158

@@ -142,4 +205,21 @@ internal class EncodingContextManager {
142205
fun readEncodingDirective(reader: BytecodeIonReader) {
143206
TODO()
144207
}
208+
209+
/**
210+
* Rebuilds the effective symbol table using all the active encoding modules
211+
*/
212+
private fun rebuildEffectiveSymbolTable() {
213+
val newEffectiveSymbolTable = spareTables.symbols
214+
newEffectiveSymbolTable.truncate(SystemSymbols.ION_1_0_MAX_ID + 1)
215+
// TODO: Make this more efficient with an array copy operation.
216+
additionalActiveModules.forEach { m -> m.symbols.forEach { newEffectiveSymbolTable.add(it) } }
217+
218+
spareTables.symbols = effectiveTables.symbols
219+
effectiveTables.symbols = newEffectiveSymbolTable
220+
}
221+
222+
private fun rebuildEffectiveMacroTable() {
223+
TODO()
224+
}
145225
}

src/main/java/com/amazon/ion/bytecode/bin10/ByteArrayBytecodeGenerator10.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ import kotlin.math.min
3131
/**
3232
* A bytecode generator for Ion 1.0 binary encoding.
3333
*
34+
* TODO: Consider _not_ stopping the refill when an Ion 1.0 IVM or Ion 1.0 symbol table is encountered, since it does
35+
* not affect the correctness. Only things that change the macro table require the refill to stop.
36+
*
3437
* #### Note on integer values
3538
*
3639
* Because there are separate positive/negative opcodes for integers in Ion 1.0, the data referenced in an INT_REF

src/main/java/com/amazon/ion/bytecode/ir/Instructions.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,22 @@ internal object Instructions {
4848
@JvmStatic
4949
fun getData(instruction: Int) = instruction and DATA_MASK
5050

51+
/**
52+
* Determines the number of operands based on the operand count bits and the `length` in the data bits.
53+
*
54+
* @param instruction The packed instruction integer
55+
* @return The number of operands for this instruction.
56+
*/
57+
@JvmStatic
58+
fun getOperandCount(instruction: Int): Int {
59+
val maybeLength = getData(instruction)
60+
val operandCountBits = getOperandCountBits(instruction)
61+
// equivalent to `i += if (operandsToSkip == 3) length else operandsToSkip`
62+
// `useOperandCount` is all zeros if `operandsToSkip` is 3, and all ones if `operandsToCount` is smaller than 3.
63+
val useOperandCount = ((operandCountBits - 3) shr 2)
64+
return (operandCountBits and useOperandCount) or (maybeLength and useOperandCount.inv())
65+
}
66+
5167
/**
5268
* Packs a data value with an instruction to create a packed instruction.
5369
*/

0 commit comments

Comments
 (0)