Rearth · Su5eD · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/build.gradle b/build.gradle
@@ -1,5 +1,5 @@
 plugins {
-    id 'dev.architectury.loom' version '1.7-SNAPSHOT' apply false
+    id 'dev.architectury.loom' version '1.13-SNAPSHOT' apply false
     id 'architectury-plugin' version '3.4-SNAPSHOT'
     id 'com.github.johnrengelman.shadow' version '8.1.1' apply false
     id 'org.moddedmc.wiki.toolkit' version '0.2.7'
@@ -91,4 +91,28 @@ subprojects {
             // retrieving dependencies.
         }
     }
+
+    if (project.name != 'common') {
+        sourceSets {
+            testmod {
+                compileClasspath += sourceSets.main.compileClasspath
+                runtimeClasspath += sourceSets.main.runtimeClasspath
+
+                java.srcDir project(':common').file('src/testmod/java')
+                resources.srcDir project(':common').file('src/testmod/resources')
+            }
+        }
+
+        loom {
+            runs {
+                testmodClient {
+                    client()
+                    ideConfigGenerated true
+                    name = "Testmod Client"
+                    source sourceSets.main
+                    source sourceSets.testmod
+                }
+            }
+        }
+    }
 }
diff --git a/common/build.gradle b/common/build.gradle
@@ -35,4 +35,11 @@ tasks.named('processResources') {
     doLast {
         println "processResources executed: wiki folder copied into assets/oracle_index"
     }
-}
+}
+
+sourceSets {
+    testmod {
+        compileClasspath += sourceSets.main.compileClasspath
+        runtimeClasspath += sourceSets.main.runtimeClasspath
+    }
+}
diff --git a/common/src/main/java/rearth/oracle/OracleClient.java b/common/src/main/java/rearth/oracle/OracleClient.java
@@ -6,9 +6,7 @@
 import net.minecraft.client.MinecraftClient;
 import net.minecraft.client.gui.screen.Screen;
 import net.minecraft.client.option.KeyBinding;
-import net.minecraft.client.resource.language.I18n;
 import net.minecraft.item.ItemStack;
-import net.minecraft.registry.Registries;
 import net.minecraft.resource.ResourceManager;
 import net.minecraft.resource.ResourceType;
 import net.minecraft.resource.SynchronousResourceReloader;
@@ -18,17 +16,17 @@
 import org.apache.logging.log4j.core.config.Configurator;
 import org.jetbrains.annotations.Nullable;
 import org.lwjgl.glfw.GLFW;
+import rearth.oracle.docs.DocsFormat;
+import rearth.oracle.docs.DocsIndexer;
+import rearth.oracle.docs.DocsMode;
 import rearth.oracle.progress.AdvancementProgressValidator;
 import rearth.oracle.ui.OracleScreen;
 import rearth.oracle.ui.SearchScreen;
-import rearth.oracle.util.MarkdownParser;
+import rearth.oracle.util.TitleLookup;
 
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Optional;
-import java.util.Set;
+import java.util.*;
+import java.util.function.BiPredicate;
+import java.util.function.Supplier;
 
 public final class OracleClient {
 
@@ -37,11 +35,12 @@ public final class OracleClient {
     public static final KeyBinding ORACLE_WIKI = new KeyBinding("key.oracle_index.open", GLFW.GLFW_KEY_H, "key.categories.oracle");
     public static final KeyBinding ORACLE_SEARCH = new KeyBinding("key.oracle_index.search", -1, "key.categories.oracle");
 
-    public static final Set<String> LOADED_WIKIS = new HashSet<>(); // just keeps a set of loaded wiki ids
-    public static final HashMap<Identifier, ItemArticleRef> ITEM_LINKS = new HashMap<>();   // items that have a corresponding wiki page (docs or content)
-    public static final HashMap<String, Pair<String, String>> UNLOCK_CRITERIONS = new HashMap<>();  // path/key here is: "books/modid/folder/entry.mdx". Value is unlock type and content
-    public static final HashMap<String, Set<String>> AVAILABLE_MODES = new HashMap<>(); // wikiID -> Set of available modes (e.g., "oritech" -> ["docs", "content"])
-    public static final HashMap<String, Identifier> CONTENT_ID_MAP = new HashMap<>();// item / block id -> resource path (e.g., "oritech:enderic_laser" -> "oracle_index:books/oritech/.content/machines/laser.mdx")
+    public static final Map<String, DocsFormat> LOADED_WIKIS = new HashMap<>(); // map of loaded wiki ids to formats (specifies directory layout)
+    public static final Map<Identifier, ItemArticleRef> ITEM_LINKS = new HashMap<>();   // items that have a corresponding wiki page (docs or content)
+    public static final Map<String, Pair<String, String>> UNLOCK_CRITERIONS = new HashMap<>();  // path/key here is: "books/modid/folder/entry.mdx". Value is unlock type and content
+    public static final Map<String, Set<DocsMode>> AVAILABLE_MODES = new HashMap<>(); // wikiID -> Set of available modes (e.g., "oritech" -> ["docs", "content"])
+    public static final Map<String, Identifier> CONTENT_ID_MAP = new HashMap<>();// item / block id -> resource path (e.g., "oritech:enderic_laser" -> "oracle_index:books/oritech/.content/machines/laser.mdx")
+    public static final Map<String, Map<String, Identifier>> CONTENT_REF_MAP = new HashMap<>();// page ref -> resource path (e.g., "colored_cables" -> "oracle_index:books/oritech/.content/cabling/colored_cables.mdx")
 
     public static ItemStack tooltipStack;
     public static float openEntryProgress = 0;
@@ -147,75 +146,58 @@ public static boolean hasContentEntry(Identifier assetId) {
         return CONTENT_ID_MAP.containsKey(assetId.toString());
     }
 
+    public static DocsMode getDocsModeForPage(Identifier pageId) {
+        String path = pageId.getPath();
+        String modId = Objects.requireNonNull(DocsIndexer.extractModid(path), "modid must be extracted");
+        DocsFormat format = getWikiFormat(modId);
+        return format.isContentPath(path) ? DocsMode.CONTENT : DocsMode.DOCS;
+    }
+
+    public static DocsFormat getWikiFormat(String wikiId) {
+        return Objects.requireNonNull(LOADED_WIKIS.get(wikiId), "unknown wiki id");
+    }
+
+    @Nullable
+    public static Identifier getPage(String wikiId, String ref) {
+        Map<String, Identifier> refs = CONTENT_REF_MAP.get(wikiId);
+        return refs != null ? refs.get(ref) : null;
+    }
+
     private static void findAllResourceEntries(ResourceManager manager) {
-        var resources = manager.findResources(ROOT_DIR, path -> path.getPath().endsWith(".mdx"));
-
+        TitleLookup.clearCache();
+
+        DocsIndexer indexer = new DocsIndexer();
+        indexer.findAllResourceEntries(manager);
+
         LOADED_WIKIS.clear();
+        LOADED_WIKIS.putAll(indexer.getLoadedWikis());
+
         ITEM_LINKS.clear();
+        ITEM_LINKS.putAll(indexer.getItemLinks());
+
         UNLOCK_CRITERIONS.clear();
+        UNLOCK_CRITERIONS.putAll(indexer.getUnlockCriterions());
+
         CONTENT_ID_MAP.clear();
-        AVAILABLE_MODES.clear();
+        CONTENT_ID_MAP.putAll(indexer.getContentIds());
 
-        for (var entry : resources.entrySet()) {
-            var resourceId = entry.getKey();
-            var path = resourceId.getPath(); // e.g., "books/oritech/.content/machines/laser.mdx"
-
-            // extract mode + mod id
-            var segments = path.split("/");
-            if (segments.length < 2) continue;
-
-            var modId = segments[1]; // e.g., "oritech"
-            LOADED_WIKIS.add(modId);
-
-            if (path.contains(".translated")) continue; // skip / don't support translations for now in indexing
-
-            // check docs or content
-            var isContent = path.contains("/.content/");
-            var mode = isContent ? "content" : "docs";
-            AVAILABLE_MODES.computeIfAbsent(modId, k -> new HashSet<>()).add(mode);
-
-            // parse frontmatter
-            try (var inputStream = entry.getValue().getInputStream()) {
-                var fileContent = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8);
-                var frontmatter = MarkdownParser.parseFrontmatter(fileContent);
-
-                // map game id for content links
-                if (isContent && frontmatter.containsKey("id")) {
-                    var id = frontmatter.get("id").trim();
-                    CONTENT_ID_MAP.put(id, resourceId);
-                    var itemId = Identifier.of(id);
-                    var title = frontmatter.getOrDefault("title", "missing");
-                    if (title.equals("missing") && Registries.ITEM.containsId(itemId))
-                        title = I18n.translate(Registries.ITEM.get(itemId).getTranslationKey());
-                    ITEM_LINKS.put(itemId, new ItemArticleRef(resourceId, title, modId));
-                }
-
-                // frontmatter custom item links indexing
-                if (frontmatter.containsKey("related_items")) {
-                    var baseString = frontmatter.get("related_items").replace("[", "").replace("]", "").replace("\"", "");
-                    for (var itemString : baseString.split(", ")) {
-                        var itemId = Identifier.of(itemString.trim());
-                        ITEM_LINKS.put(itemId, new ItemArticleRef(resourceId, frontmatter.getOrDefault("title", "missing"), modId));
-                    }
-                }
-
-                if (frontmatter.containsKey("unlock")) {
-                    var unlockText = frontmatter.get("unlock");
-                    var parts = unlockText.split(":", 2);
-                    if (parts.length == 2) {
-                        UNLOCK_CRITERIONS.put(path, new Pair<>(parts[0], parts[1]));
-                    }
-                }
-
-            } catch (IOException e) {
-                Oracle.LOGGER.error("Unable to load book entry: {}", resourceId, e);
-            }
-        }
+        CONTENT_REF_MAP.clear();
+        CONTENT_REF_MAP.putAll(indexer.getContentRefs());
+
+        AVAILABLE_MODES.clear();
+        AVAILABLE_MODES.putAll(indexer.getAvailableModes());
     }
 
     public static SemanticSearch getOrCreateSearch() {
 
-        if (searchInstance == null) searchInstance = new SemanticSearch();
+        if (searchInstance == null) {
+            BiPredicate<String, String> filter = (modId, path) -> {
+              DocsFormat format = getWikiFormat(modId);
+              return !format.isTranslatedPath(path);
+            };
+
+            searchInstance = new SemanticSearch(filter);
+        }
 
         return searchInstance;
     }
@@ -230,7 +212,9 @@ public static Optional<Identifier> getTranslatedPath(Identifier identifier, Stri
         var resourceManager = MinecraftClient.getInstance().getResourceManager();
 
         if (!languageCode.startsWith("en_")) {
-            var translatedPath = Identifier.of(identifier.getNamespace(), identifier.getPath().replace(ROOT_DIR + "/" + wikiId, ROOT_DIR + "/" + wikiId + "/.translated/" + languageCode));
+            var format = getWikiFormat(wikiId);
+            var translatedDir = format.getTranslatedDir(languageCode);
+            var translatedPath = Identifier.of(identifier.getNamespace(), identifier.getPath().replace(ROOT_DIR + "/" + wikiId, ROOT_DIR + "/" + wikiId + translatedDir));
 
             if (resourceManager.getResource(translatedPath).isPresent()) {
                 return Optional.of(translatedPath);
@@ -241,7 +225,7 @@ public static Optional<Identifier> getTranslatedPath(Identifier identifier, Stri
         return Optional.empty();
     }
 
-    public record ItemArticleRef(Identifier linkTarget, String entryName, String wikiId) {
+    public record ItemArticleRef(Identifier linkTarget, Supplier<String> entryName, String wikiId, int pageIDs) {
     }
 
 }
diff --git a/common/src/main/java/rearth/oracle/SemanticSearch.java b/common/src/main/java/rearth/oracle/SemanticSearch.java
@@ -12,6 +12,7 @@
 import net.minecraft.client.MinecraftClient;
 import net.minecraft.util.Identifier;
 import rearth.oracle.util.MarkdownParser;
+import rearth.oracle.util.TitleLookup;
 
 import java.io.IOException;
 import java.io.InvalidObjectException;
@@ -21,6 +22,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.BiPredicate;
 
 import static rearth.oracle.OracleClient.ROOT_DIR;
 
@@ -33,7 +35,7 @@ public class SemanticSearch {
     public static AtomicBoolean EMBEDDING_ERRORED = new AtomicBoolean(false);
     public static AtomicBoolean FINISHED = new AtomicBoolean(false);
 
-    public SemanticSearch() {
+    public SemanticSearch(BiPredicate<String, String> filter) {
 
         // do this in background to avoid freezing main
         new Thread(() -> {
@@ -73,18 +75,23 @@ public SemanticSearch() {
                     var entryFileName = segments[segments.length - 1]; // e.g. "wrench.mdx"
                     var entryDirectory = entryPath.replace(entryFileName, ""); // e.g. "tools" or "processing/reactor"
 
-                    if (entryDirectory.contains(".translated")) continue; // skip / don't support translations for now
+                    if (!filter.test(modId, entryDirectory)) continue; // skip / don't support translations for now
 
                     try {
                         var fileContent = new String(resources.get(resourceId).getInputStream().readAllBytes(), StandardCharsets.UTF_8);
-                        var fileComponents = MarkdownParser.parseFrontmatter(fileContent);
-
+                        var frontmatter = MarkdownParser.parseFrontmatter(fileContent);
+                        var title = MarkdownParser.parseHeadingTitle(fileContent);
+
                         // generate embeddings
-                        this.queueEmbeddingsJob(modId, entryDirectory, entryFileName, fileComponents, fileContent);
+                        var fileComponents = new HashMap<String, String>();
+                        frontmatter.map().forEach((k, v) -> {
+                            if (v.size() == 1) fileComponents.put(k, v.getFirst());
+                        });
+                        this.queueEmbeddingsJob(modId, entryDirectory, entryFileName, fileComponents, fileContent, title);
 
 
                     } catch (IOException e) {
-                        Oracle.LOGGER.error("Unable to load book with id: " + resourceId);
+                        Oracle.LOGGER.error("Unable to load book with id: {}", resourceId);
                         throw new RuntimeException(e);
                     }
                 }
@@ -127,12 +134,7 @@ public ArrayList<SearchResult> search(String query) {
             var id = match.embedded().metadata().getString("wiki") + ":" + match.embedded().metadata().getString("category") + match.embedded().metadata().getString("fileName");
             var title = match.embedded().metadata().getString("title");
             if (title == null) {
-                var frontmatter = new HashMap<String, String>();
-                for (var data : match.embedded().metadata().toMap().entrySet()) {
-                    if (data.getValue() instanceof String value)
-                        frontmatter.put(data.getKey(), value);
-                }
-                title = MarkdownParser.getTitle(frontmatter, Identifier.of(id));
+                title = TitleLookup.getTitle(Identifier.of(id));
             }
 
             // check if id already exists, add it to alt texts
@@ -154,12 +156,13 @@ public ArrayList<SearchResult> search(String query) {
 
     }
 
-    public void queueEmbeddingsJob(String wikiId, String filePath, String fileName, Map<String, String> frontmatter, String content) {
+    public void queueEmbeddingsJob(String wikiId, String filePath, String fileName, Map<String, String> frontmatter, String content, String title) {
 
         var document = Document.from(content, Metadata.from(frontmatter));
         document.metadata().put("fileName", fileName);
         document.metadata().put("category", filePath);
         document.metadata().put("wiki", wikiId);
+        if (title != null) document.metadata().put("title", title);
         ingestor.ingest(document);
     }
 

diff --git a/common/src/main/java/rearth/oracle/docs/DocsFormat.java b/common/src/main/java/rearth/oracle/docs/DocsFormat.java
@@ -0,0 +1,17 @@
+package rearth.oracle.docs;
+
+public interface DocsFormat {
+    boolean isContentPath(String path);
+
+    boolean isTranslatedPath(String path);
+
+    String getDocsRoot(DocsMode mode);
+
+    String getTranslatedDir(String locale);
+
+    String getAssetsRoot();
+
+    String getDocsPagePath(String slug);
+
+    String stripContentPrefix(String path);
+}