1421 lines
106 KiB
HTML
1421 lines
106 KiB
HTML
<!DOCTYPE html>
|
||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="generator" content="quarto-1.7.23">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||
|
||
<meta name="dcterms.date" content="2025-05-05">
|
||
|
||
<title>Case Study: Local LLM-Based NER with n8n and Ollama – Nicole Dresselhaus</title>
|
||
<style>
|
||
code{white-space: pre-wrap;}
|
||
span.smallcaps{font-variant: small-caps;}
|
||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||
div.column{flex: auto; overflow-x: auto;}
|
||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||
ul.task-list{list-style: none;}
|
||
ul.task-list li input[type="checkbox"] {
|
||
width: 0.8em;
|
||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||
vertical-align: middle;
|
||
}
|
||
/* CSS for syntax highlighting */
|
||
html { -webkit-text-size-adjust: 100%; }
|
||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||
.sourceCode { overflow: visible; }
|
||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||
div.sourceCode { margin: 1em 0; }
|
||
pre.sourceCode { margin: 0; }
|
||
@media screen {
|
||
div.sourceCode { overflow: auto; }
|
||
}
|
||
@media print {
|
||
pre > code.sourceCode { white-space: pre-wrap; }
|
||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||
}
|
||
pre.numberSource code
|
||
{ counter-reset: source-line 0; }
|
||
pre.numberSource code > span
|
||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||
pre.numberSource code > span > a:first-child::before
|
||
{ content: counter(source-line);
|
||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||
border: none; display: inline-block;
|
||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||
-khtml-user-select: none; -moz-user-select: none;
|
||
-ms-user-select: none; user-select: none;
|
||
padding: 0 4px; width: 4em;
|
||
}
|
||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||
div.sourceCode
|
||
{ }
|
||
@media screen {
|
||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||
}
|
||
/* CSS for citations */
|
||
div.csl-bib-body { }
|
||
div.csl-entry {
|
||
clear: both;
|
||
margin-bottom: 0em;
|
||
}
|
||
.hanging-indent div.csl-entry {
|
||
margin-left:2em;
|
||
text-indent:-2em;
|
||
}
|
||
div.csl-left-margin {
|
||
min-width:2em;
|
||
float:left;
|
||
}
|
||
div.csl-right-inline {
|
||
margin-left:2em;
|
||
padding-left:1em;
|
||
}
|
||
div.csl-indent {
|
||
margin-left: 2em;
|
||
}</style>
|
||
|
||
|
||
<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
|
||
<script src="../site_libs/quarto-nav/headroom.min.js"></script>
|
||
<script src="../site_libs/clipboard/clipboard.min.js"></script>
|
||
<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||
<script src="../site_libs/quarto-search/fuse.min.js"></script>
|
||
<script src="../site_libs/quarto-search/quarto-search.js"></script>
|
||
<meta name="quarto:offset" content="../">
|
||
<script src="../site_libs/quarto-html/quarto.js" type="module"></script>
|
||
<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
|
||
<script src="../site_libs/quarto-html/popper.min.js"></script>
|
||
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||
<script src="../site_libs/quarto-html/anchor.min.js"></script>
|
||
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-2c84ecb840a13f4c7993f9e5648f0c14.css" rel="stylesheet" class="quarto-color-scheme quarto-color-alternate" id="quarto-text-highlighting-styles">
|
||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-6cf5824034cebd0380a5b9c74c43f006.css" rel="stylesheet" class="quarto-color-scheme" id="quarto-text-highlighting-styles">
|
||
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
|
||
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||
<link href="../site_libs/bootstrap/bootstrap-ec71cb1e120c0dd41819aca960e74e38.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme" id="quarto-bootstrap" data-mode="light">
|
||
<link href="../site_libs/bootstrap/bootstrap-dark-6ed95ce66646ab2447a87e45f81c21f3.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme quarto-color-alternate" id="quarto-bootstrap" data-mode="dark">
|
||
<link href="../site_libs/bootstrap/bootstrap-ec71cb1e120c0dd41819aca960e74e38.min.css" rel="stylesheet" append-hash="true" class="quarto-color-scheme-extra" id="quarto-bootstrap" data-mode="light">
|
||
<script id="quarto-search-options" type="application/json">{
|
||
"location": "navbar",
|
||
"copy-button": false,
|
||
"collapse-after": 3,
|
||
"panel-placement": "end",
|
||
"type": "overlay",
|
||
"limit": 50,
|
||
"keyboard-shortcut": [
|
||
"f",
|
||
"/",
|
||
"s"
|
||
],
|
||
"show-item-context": false,
|
||
"language": {
|
||
"search-no-results-text": "No results",
|
||
"search-matching-documents-text": "matching documents",
|
||
"search-copy-link-title": "Copy link to search",
|
||
"search-hide-matches-text": "Hide additional matches",
|
||
"search-more-match-text": "more match in this document",
|
||
"search-more-matches-text": "more matches in this document",
|
||
"search-clear-button-title": "Clear",
|
||
"search-text-placeholder": "",
|
||
"search-detached-cancel-button-title": "Cancel",
|
||
"search-submit-button-title": "Submit",
|
||
"search-label": "Search"
|
||
}
|
||
}</script>
|
||
|
||
|
||
<meta property="og:title" content="Case Study: Local LLM-Based NER with n8n and Ollama – Nicole Dresselhaus">
|
||
<meta property="og:description" content="Named Entity Recognition (NER) is a foundational task in text analysis,
|
||
traditionally addressed by training NLP models on annotated data.">
|
||
<meta property="og:image" content="https://nicole.dresselhaus.cloud/thumbs/writing_ner4all-case-study.png">
|
||
<meta property="og:site_name" content="Nicole Dresselhaus">
|
||
<meta property="og:image:height" content="400">
|
||
<meta property="og:image:width" content="600">
|
||
<meta name="citation_title" content="Case Study: Local LLM-Based NER with n8n and Ollama">
|
||
<meta name="citation_abstract" content="Named Entity Recognition (NER) is a foundational task in text analysis,
|
||
traditionally addressed by training NLP models on annotated data. However, a
|
||
recent study – _“NER4All or Context is All You Need”_ – showed that
|
||
out-of-the-box Large Language Models (LLMs) can **significantly outperform**
|
||
classical NER pipelines (e.g. spaCy, Flair) on historical texts by using clever
|
||
prompting, without any model retraining. This case study demonstrates how to
|
||
implement the paper’s method using entirely local infrastructure: an **n8n**
|
||
automation workflow (for orchestration) and a **Ollama** server running a
|
||
14B-parameter LLM on an NVIDIA A100 GPU. The goal is to enable research
|
||
engineers and tech-savvy historians to **reproduce and apply this method
|
||
easily** on their own data, with a focus on usability and correct outputs rather
|
||
than raw performance.
|
||
|
||
We will walk through the end-to-end solution – from accepting a webhook input
|
||
that defines entity types (e.g. Person, Organization, Location) to prompting a
|
||
local LLM to extract those entities from a text. The solution covers setup
|
||
instructions, required infrastructure (GPU, memory, software), model
|
||
configuration, and workflow design in n8n. We also discuss potential limitations
|
||
(like model accuracy and context length) and how to address them. By the end,
|
||
you will have a clear blueprint for a **self-hosted NER pipeline** that
|
||
leverages the knowledge encoded in LLMs (as advocated by the paper) while
|
||
maintaining data privacy and reproducibility.
|
||
">
|
||
<meta name="citation_author" content="GPT-4.5 ">
|
||
<meta name="citation_author" content="cogito-v1-preview ">
|
||
<meta name="citation_author" content="Nicole Dresselhaus">
|
||
<meta name="citation_publication_date" content="2025-05-05">
|
||
<meta name="citation_cover_date" content="2025-05-05">
|
||
<meta name="citation_year" content="2025">
|
||
<meta name="citation_online_date" content="2025-05-05">
|
||
<meta name="citation_fulltext_html_url" content="https://nicole.dresselhaus.cloud/Writing/ner4all-case-study.html">
|
||
<meta name="citation_language" content="en">
|
||
<meta name="citation_reference" content="citation_title=Ollama - Chroma Cookbook;,citation_publication_date=2024-04;,citation_cover_date=2024-04;,citation_year=2024;,citation_fulltext_html_url=https://cookbook.chromadb.dev/integrations/ollama/embeddings/;">
|
||
<meta name="citation_reference" content="citation_title=Just wanted to mention that the smart connections plugin is incredible. : r/ObsidianMD;,citation_publication_date=2024-10;,citation_cover_date=2024-10;,citation_year=2024;,citation_fulltext_html_url=https://www.reddit.com/r/ObsidianMD/comments/1fzmkdk/just_wanted_to_mention_that_the_smart_connections/;">
|
||
<meta name="citation_reference" content="citation_title=Khoj: An AI powered Search Assistant for your Second Brain - Share &amp;amp; showcase - Obsidian Forum;,citation_publication_date=2023-07;,citation_cover_date=2023-07;,citation_year=2023;,citation_fulltext_html_url=https://forum.obsidian.md/t/khoj-an-ai-powered-search-assistant-for-you-second-brain/53756;">
|
||
<meta name="citation_reference" content="citation_title=Supercharging Obsidian Search with AI and Ollama;,citation_author=undefined @airabbitX;,citation_publication_date=2024-11;,citation_cover_date=2024-11;,citation_year=2024;,citation_fulltext_html_url=https://medium.com/@airabbitX/supercharging-obsidian-search-with-local-llms-a-personal-journey-1e008eb649a6;">
|
||
<meta name="citation_reference" content="citation_title=Export to common graph formats - Plugins ideas - Obsidian Forum;,citation_publication_date=2020-02;,citation_cover_date=2020-02;,citation_year=2020;,citation_fulltext_html_url=https://forum.obsidian.md/t/export-to-common-graph-formats/4138;">
|
||
<meta name="citation_reference" content="citation_title=Personal Knowledge Graphs in Obsidian;,citation_author=Volodymyr Pavlyshyn;,citation_publication_date=2024-03;,citation_cover_date=2024-03;,citation_year=2024;,citation_fulltext_html_url=https://volodymyrpavlyshyn.medium.com/personal-knowledge-graphs-in-obsidian-528a0f4584b9;">
|
||
<meta name="citation_reference" content="citation_title=How to export your Obsidian Vault to RDF;,citation_author=Volodymyr Pavlyshyn;,citation_publication_date=2024-03;,citation_cover_date=2024-03;,citation_year=2024;,citation_fulltext_html_url=https://volodymyrpavlyshyn.medium.com/how-to-export-your-obsidian-vault-to-rdf-00fb2539ed18;">
|
||
<meta name="citation_reference" content="citation_title=AI empowered Zettelkasten with NER and Graph LLM - Knowledge management - Obsidian Forum;,citation_publication_date=2024-03;,citation_cover_date=2024-03;,citation_year=2024;,citation_fulltext_html_url=https://forum.obsidian.md/t/ai-empowered-zettelkasten-with-ner-and-graph-llm/79112;">
|
||
<meta name="citation_reference" content="citation_title=Build your second brain with Khoj AI;,citation_publication_date=2024-06;,citation_cover_date=2024-06;,citation_year=2024;,citation_fulltext_html_url=https://dswharshit.medium.com/build-your-second-brain-with-khoj-ai-high-signal-ai-2-87492730d7ce;">
|
||
<meta name="citation_reference" content="citation_title=Second Brain Assistant with Obsidian;,citation_publication_date=2025-03;,citation_cover_date=2025-03;,citation_year=2025;,citation_fulltext_html_url=https://www.ssp.sh/brain/second-brain-assistant-with-obsidian-notegpt/;">
|
||
<meta name="citation_reference" content="citation_title=Basic Memory | AI Conversations That Build Knowledge;,citation_fulltext_html_url=https://basicmachines.co/;">
|
||
<meta name="citation_reference" content="citation_title=Local (Free) RAG with Question Generation using LM Studio, Nomic embeddings, ChromaDB and Llama 3.2 on a Mac mini M1;,citation_author=Oscar Galvis;,citation_publication_date=2024-10;,citation_cover_date=2024-10;,citation_year=2024;,citation_fulltext_html_url=https://lomaky.medium.com/local-free-rag-with-question-generation-using-lm-studio-nomic-embeddings-chromadb-and-llama-3-2-9758877e93b4;">
|
||
<meta name="citation_reference" content="citation_title=privateGPT / llama.cpp based scripts;,citation_publication_date=2025-03;,citation_cover_date=2025-03;,citation_year=2025;,citation_fulltext_html_url=https://www.ssp.sh/brain/second-brain-assistant-with-obsidian-notegpt/;">
|
||
</head>
|
||
|
||
<body class="nav-sidebar docked nav-fixed quarto-light"><script id="quarto-html-before-body" type="application/javascript">
|
||
const toggleBodyColorMode = (bsSheetEl) => {
|
||
const mode = bsSheetEl.getAttribute("data-mode");
|
||
const bodyEl = window.document.querySelector("body");
|
||
if (mode === "dark") {
|
||
bodyEl.classList.add("quarto-dark");
|
||
bodyEl.classList.remove("quarto-light");
|
||
} else {
|
||
bodyEl.classList.add("quarto-light");
|
||
bodyEl.classList.remove("quarto-dark");
|
||
}
|
||
}
|
||
const toggleBodyColorPrimary = () => {
|
||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap:not([rel=disabled-stylesheet])");
|
||
if (bsSheetEl) {
|
||
toggleBodyColorMode(bsSheetEl);
|
||
}
|
||
}
|
||
window.setColorSchemeToggle = (alternate) => {
|
||
const toggles = window.document.querySelectorAll('.quarto-color-scheme-toggle');
|
||
for (let i=0; i < toggles.length; i++) {
|
||
const toggle = toggles[i];
|
||
if (toggle) {
|
||
if (alternate) {
|
||
toggle.classList.add("alternate");
|
||
} else {
|
||
toggle.classList.remove("alternate");
|
||
}
|
||
}
|
||
}
|
||
};
|
||
const toggleColorMode = (alternate) => {
|
||
// Switch the stylesheets
|
||
const primaryStylesheets = window.document.querySelectorAll('link.quarto-color-scheme:not(.quarto-color-alternate)');
|
||
const alternateStylesheets = window.document.querySelectorAll('link.quarto-color-scheme.quarto-color-alternate');
|
||
manageTransitions('#quarto-margin-sidebar .nav-link', false);
|
||
if (alternate) {
|
||
// note: dark is layered on light, we don't disable primary!
|
||
enableStylesheet(alternateStylesheets);
|
||
for (const sheetNode of alternateStylesheets) {
|
||
if (sheetNode.id === "quarto-bootstrap") {
|
||
toggleBodyColorMode(sheetNode);
|
||
}
|
||
}
|
||
} else {
|
||
disableStylesheet(alternateStylesheets);
|
||
enableStylesheet(primaryStylesheets)
|
||
toggleBodyColorPrimary();
|
||
}
|
||
manageTransitions('#quarto-margin-sidebar .nav-link', true);
|
||
// Switch the toggles
|
||
window.setColorSchemeToggle(alternate)
|
||
// Hack to workaround the fact that safari doesn't
|
||
// properly recolor the scrollbar when toggling (#1455)
|
||
if (navigator.userAgent.indexOf('Safari') > 0 && navigator.userAgent.indexOf('Chrome') == -1) {
|
||
manageTransitions("body", false);
|
||
window.scrollTo(0, 1);
|
||
setTimeout(() => {
|
||
window.scrollTo(0, 0);
|
||
manageTransitions("body", true);
|
||
}, 40);
|
||
}
|
||
}
|
||
const disableStylesheet = (stylesheets) => {
|
||
for (let i=0; i < stylesheets.length; i++) {
|
||
const stylesheet = stylesheets[i];
|
||
stylesheet.rel = 'disabled-stylesheet';
|
||
}
|
||
}
|
||
const enableStylesheet = (stylesheets) => {
|
||
for (let i=0; i < stylesheets.length; i++) {
|
||
const stylesheet = stylesheets[i];
|
||
if(stylesheet.rel !== 'stylesheet') { // for Chrome, which will still FOUC without this check
|
||
stylesheet.rel = 'stylesheet';
|
||
}
|
||
}
|
||
}
|
||
const manageTransitions = (selector, allowTransitions) => {
|
||
const els = window.document.querySelectorAll(selector);
|
||
for (let i=0; i < els.length; i++) {
|
||
const el = els[i];
|
||
if (allowTransitions) {
|
||
el.classList.remove('notransition');
|
||
} else {
|
||
el.classList.add('notransition');
|
||
}
|
||
}
|
||
}
|
||
const isFileUrl = () => {
|
||
return window.location.protocol === 'file:';
|
||
}
|
||
window.hasAlternateSentinel = () => {
|
||
let styleSentinel = getColorSchemeSentinel();
|
||
if (styleSentinel !== null) {
|
||
return styleSentinel === "alternate";
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
const setStyleSentinel = (alternate) => {
|
||
const value = alternate ? "alternate" : "default";
|
||
if (!isFileUrl()) {
|
||
window.localStorage.setItem("quarto-color-scheme", value);
|
||
} else {
|
||
localAlternateSentinel = value;
|
||
}
|
||
}
|
||
const getColorSchemeSentinel = () => {
|
||
if (!isFileUrl()) {
|
||
const storageValue = window.localStorage.getItem("quarto-color-scheme");
|
||
return storageValue != null ? storageValue : localAlternateSentinel;
|
||
} else {
|
||
return localAlternateSentinel;
|
||
}
|
||
}
|
||
const toggleGiscusIfUsed = (isAlternate, darkModeDefault) => {
|
||
const baseTheme = document.querySelector('#giscus-base-theme')?.value ?? 'light';
|
||
const alternateTheme = document.querySelector('#giscus-alt-theme')?.value ?? 'dark';
|
||
let newTheme = '';
|
||
if(darkModeDefault) {
|
||
newTheme = isAlternate ? baseTheme : alternateTheme;
|
||
} else {
|
||
newTheme = isAlternate ? alternateTheme : baseTheme;
|
||
}
|
||
const changeGiscusTheme = () => {
|
||
// From: https://github.com/giscus/giscus/issues/336
|
||
const sendMessage = (message) => {
|
||
const iframe = document.querySelector('iframe.giscus-frame');
|
||
if (!iframe) return;
|
||
iframe.contentWindow.postMessage({ giscus: message }, 'https://giscus.app');
|
||
}
|
||
sendMessage({
|
||
setConfig: {
|
||
theme: newTheme
|
||
}
|
||
});
|
||
}
|
||
const isGiscussLoaded = window.document.querySelector('iframe.giscus-frame') !== null;
|
||
if (isGiscussLoaded) {
|
||
changeGiscusTheme();
|
||
}
|
||
};
|
||
const queryPrefersDark = window.matchMedia('(prefers-color-scheme: dark)');
|
||
const darkModeDefault = queryPrefersDark.matches;
|
||
document.querySelector('link.quarto-color-scheme-extra').rel = 'disabled-stylesheet';
|
||
let localAlternateSentinel = darkModeDefault ? 'alternate' : 'default';
|
||
// Dark / light mode switch
|
||
window.quartoToggleColorScheme = () => {
|
||
// Read the current dark / light value
|
||
let toAlternate = !window.hasAlternateSentinel();
|
||
toggleColorMode(toAlternate);
|
||
setStyleSentinel(toAlternate);
|
||
toggleGiscusIfUsed(toAlternate, darkModeDefault);
|
||
};
|
||
queryPrefersDark.addEventListener("change", e => {
|
||
if(window.localStorage.getItem("quarto-color-scheme") !== null)
|
||
return;
|
||
const alternate = e.matches
|
||
toggleColorMode(alternate);
|
||
localAlternateSentinel = e.matches ? 'alternate' : 'default'; // this is used alongside local storage!
|
||
toggleGiscusIfUsed(alternate, darkModeDefault);
|
||
});
|
||
// Switch to dark mode if need be
|
||
if (window.hasAlternateSentinel()) {
|
||
toggleColorMode(true);
|
||
} else {
|
||
toggleColorMode(false);
|
||
}
|
||
</script>
|
||
|
||
<div id="quarto-search-results"></div>
|
||
<header id="quarto-header" class="headroom fixed-top">
|
||
<nav class="navbar navbar-expand-lg " data-bs-theme="dark">
|
||
<div class="navbar-container container-fluid">
|
||
<div class="navbar-brand-container mx-auto">
|
||
<a class="navbar-brand" href="../index.html">
|
||
<span class="navbar-title">Nicole Dresselhaus</span>
|
||
</a>
|
||
</div>
|
||
<div id="quarto-search" class="" title="Search"></div>
|
||
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" role="menu" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||
<span class="navbar-toggler-icon"></span>
|
||
</button>
|
||
<div class="collapse navbar-collapse" id="navbarCollapse">
|
||
<ul class="navbar-nav navbar-nav-scroll me-auto">
|
||
<li class="nav-item">
|
||
<a class="nav-link" href="../index.html"> <i class="bi bi-house" role="img">
|
||
</i>
|
||
<span class="menu-text">Home</span></a>
|
||
</li>
|
||
<li class="nav-item">
|
||
<a class="nav-link active" href="../About/index.html" aria-current="page"> <i class="bi bi-file-person" role="img">
|
||
</i>
|
||
<span class="menu-text">About</span></a>
|
||
</li>
|
||
</ul>
|
||
<ul class="navbar-nav navbar-nav-scroll ms-auto">
|
||
<li class="nav-item compact">
|
||
<a class="nav-link" href="../index.xml"> <i class="bi bi-rss" role="img">
|
||
</i>
|
||
<span class="menu-text"></span></a>
|
||
</li>
|
||
</ul>
|
||
</div> <!-- /navcollapse -->
|
||
<div class="quarto-navbar-tools">
|
||
<a href="" class="quarto-color-scheme-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleColorScheme(); return false;" title="Toggle dark mode"><i class="bi"></i></a>
|
||
<a href="" class="quarto-reader-toggle quarto-navigation-tool px-1" onclick="window.quartoToggleReader(); return false;" title="Toggle reader mode">
|
||
<div class="quarto-reader-toggle-btn">
|
||
<i class="bi"></i>
|
||
</div>
|
||
</a>
|
||
</div>
|
||
</div> <!-- /container-fluid -->
|
||
</nav>
|
||
<nav class="quarto-secondary-nav">
|
||
<div class="container-fluid d-flex">
|
||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||
</button>
|
||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item">Serious</li><li class="breadcrumb-item"><a href="../Writing/documentation.html">Writing</a></li><li class="breadcrumb-item"><a href="../Writing/ner4all-case-study.html">Case Study: Local LLM-Based NER with n8n and Ollama</a></li></ol></nav>
|
||
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||
</a>
|
||
</div>
|
||
</nav>
|
||
</header>
|
||
<!-- content -->
|
||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||
<!-- sidebar -->
|
||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||
<div class="sidebar-menu-container">
|
||
<ul class="list-unstyled mt-1">
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Serious</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Writing</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth2 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Writing/documentation.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Anforderungskatalog für die Dokumentation von Forschungssoftware (Digital Humanities)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Writing/ner4all-case-study.html" class="sidebar-item-text sidebar-link active">
|
||
<span class="menu-text">Case Study: Local LLM-Based NER with n8n and Ollama</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Writing/Obsidian-RAG.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">RAG für eine Obsidian-Wissensdatenbank: Technische Ansätze</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Coding</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth2 ">
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Haskell</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth3 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/Advantages.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Talks und Posts zu Haskell</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/FFPiH.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Fortgeschrittene funktionale Programmierung in Haskell</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/Lenses.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Lenses</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Code Snippets</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth4 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/Code Snippets/Monoid.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Monoid? Da war doch was…</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/Code Snippets/Morphisms.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">*-Morpisms</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/Webapp-Example/index.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Webapp-Development in Haskell</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-6" class="collapse list-unstyled sidebar-section depth4 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/Webapp-Example/Main.hs.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Webapp-Example: Main.hs</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Coding/Haskell/Webapp-Example/MyService_Types.hs.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Webapp-Example: MyService/Types.hs</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Health</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-7" class="collapse list-unstyled sidebar-section depth2 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Health/Issues.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Mental Health</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Uni</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-8" class="collapse list-unstyled sidebar-section depth2 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Uni/Lernerfolg_an_der_Uni.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Wie lerne ich richtig an der Uni?</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="px-0"><hr class="sidebar-divider hi "></li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-9" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Fun</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-9" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-9" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-10" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Opinions</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-10" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-10" class="collapse list-unstyled sidebar-section depth2 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Opinions/Editors.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Editors</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Opinions/Keyboard-Layout.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Keyboard-Layout</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-11" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Stuff</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-11" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-11" class="collapse list-unstyled sidebar-section depth2 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../Stuff/Bielefeldverschwoerung.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Die Bielefeld-Verschwörung</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="px-0"><hr class="sidebar-divider hi "></li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-12" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Info</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-12" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-12" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a href="../About/index.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">About me</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-13" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-13" class="collapse list-unstyled sidebar-section depth2 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../About/Experience.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Highlights of my experiences in the programming world</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../About/Extracurricular.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Studium generale / University-Life</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../About/Work.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Work-Experience</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</nav>
|
||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||
<!-- margin-sidebar -->
|
||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||
<nav id="TOC" role="doc-toc" class="toc-active">
|
||
<h2 id="toc-title">On this page</h2>
|
||
|
||
<ul>
|
||
<li><a href="#background-llm-based-ner-method-overview" id="toc-background-llm-based-ner-method-overview" class="nav-link active" data-scroll-target="#background-llm-based-ner-method-overview">Background: LLM-Based NER Method Overview</a></li>
|
||
<li><a href="#solution-architecture" id="toc-solution-architecture" class="nav-link" data-scroll-target="#solution-architecture">Solution Architecture</a></li>
|
||
<li><a href="#setup-and-infrastructure-requirements" id="toc-setup-and-infrastructure-requirements" class="nav-link" data-scroll-target="#setup-and-infrastructure-requirements">Setup and Infrastructure Requirements</a></li>
|
||
<li><a href="#building-the-n8n-workflow" id="toc-building-the-n8n-workflow" class="nav-link" data-scroll-target="#building-the-n8n-workflow">Building the n8n Workflow</a>
|
||
<ul class="collapse">
|
||
<li><a href="#webhook-input-for-entities-and-text" id="toc-webhook-input-for-entities-and-text" class="nav-link" data-scroll-target="#webhook-input-for-entities-and-text">1. Webhook Input for Entities and Text</a></li>
|
||
<li><a href="#constructing-the-llm-prompt" id="toc-constructing-the-llm-prompt" class="nav-link" data-scroll-target="#constructing-the-llm-prompt">2. Constructing the LLM Prompt</a></li>
|
||
<li><a href="#configuring-the-local-llm-ollama-model-node" id="toc-configuring-the-local-llm-ollama-model-node" class="nav-link" data-scroll-target="#configuring-the-local-llm-ollama-model-node">3. Configuring the Local LLM (Ollama Model Node)</a></li>
|
||
<li><a href="#returning-the-results" id="toc-returning-the-results" class="nav-link" data-scroll-target="#returning-the-results">4. Returning the Results</a></li>
|
||
</ul></li>
|
||
<li><a href="#model-selection-considerations" id="toc-model-selection-considerations" class="nav-link" data-scroll-target="#model-selection-considerations">Model Selection Considerations</a></li>
|
||
<li><a href="#example-run" id="toc-example-run" class="nav-link" data-scroll-target="#example-run">Example Run</a></li>
|
||
<li><a href="#limitations-and-solutions" id="toc-limitations-and-solutions" class="nav-link" data-scroll-target="#limitations-and-solutions">Limitations and Solutions</a></li>
|
||
<li><a href="#conclusion" id="toc-conclusion" class="nav-link" data-scroll-target="#conclusion">Conclusion</a></li>
|
||
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
<!-- main -->
|
||
<main class="content" id="quarto-document-content">
|
||
|
||
|
||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item">Serious</li><li class="breadcrumb-item"><a href="../Writing/documentation.html">Writing</a></li><li class="breadcrumb-item"><a href="../Writing/ner4all-case-study.html">Case Study: Local LLM-Based NER with n8n and Ollama</a></li></ol></nav>
|
||
<div class="quarto-title">
|
||
<h1 class="title">Case Study: Local LLM-Based NER with n8n and Ollama</h1>
|
||
<div class="quarto-categories">
|
||
<div class="quarto-category">Article</div>
|
||
<div class="quarto-category">Case-study</div>
|
||
<div class="quarto-category">ML</div>
|
||
<div class="quarto-category">NER</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<div class="quarto-title-meta-author">
|
||
<div class="quarto-title-meta-heading">Authors</div>
|
||
<div class="quarto-title-meta-heading">Affiliations</div>
|
||
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="author"><a href="https://chatgpt.com">GPT-4.5</a> </p>
|
||
</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="affiliation">
|
||
<a href="https://openai.com">
|
||
OpenAI
|
||
</a>
|
||
</p>
|
||
</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="author"><a href="https://www.deepcogito.com/research/cogito-v1-preview">cogito-v1-preview</a> </p>
|
||
</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="affiliation">
|
||
<a href="https://www.deepcogito.com">
|
||
DeepCogito
|
||
</a>
|
||
</p>
|
||
</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="author">Nicole Dresselhaus <a href="https://orcid.org/0009-0008-8850-3679" class="quarto-title-author-orcid"> <img src=""></a></p>
|
||
</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="affiliation">
|
||
<a href="https://hu-berlin.de">
|
||
Humboldt-Universität zu Berlin
|
||
</a>
|
||
</p>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="quarto-title-meta">
|
||
|
||
|
||
<div>
|
||
<div class="quarto-title-meta-heading">Published</div>
|
||
<div class="quarto-title-meta-contents">
|
||
<p class="date">May 5, 2025</p>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</div>
|
||
|
||
<div>
|
||
<div class="abstract">
|
||
<div class="block-title">Abstract</div>
|
||
<p>Named Entity Recognition (NER) is a foundational task in text analysis, traditionally addressed by training NLP models on annotated data. However, a recent study – <em>“NER4All or Context is All You Need”</em> – showed that out-of-the-box Large Language Models (LLMs) can <strong>significantly outperform</strong> classical NER pipelines (e.g. spaCy, Flair) on historical texts by using clever prompting, without any model retraining. This case study demonstrates how to implement the paper’s method using entirely local infrastructure: an <strong>n8n</strong> automation workflow (for orchestration) and a <strong>Ollama</strong> server running a 14B-parameter LLM on an NVIDIA A100 GPU. The goal is to enable research engineers and tech-savvy historians to <strong>reproduce and apply this method easily</strong> on their own data, with a focus on usability and correct outputs rather than raw performance.</p>
|
||
<p>We will walk through the end-to-end solution – from accepting a webhook input that defines entity types (e.g. Person, Organization, Location) to prompting a local LLM to extract those entities from a text. The solution covers setup instructions, required infrastructure (GPU, memory, software), model configuration, and workflow design in n8n. We also discuss potential limitations (like model accuracy and context length) and how to address them. By the end, you will have a clear blueprint for a <strong>self-hosted NER pipeline</strong> that leverages the knowledge encoded in LLMs (as advocated by the paper) while maintaining data privacy and reproducibility.</p>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</header>
|
||
|
||
|
||
<section id="background-llm-based-ner-method-overview" class="level2">
|
||
<h2 class="anchored" data-anchor-id="background-llm-based-ner-method-overview">Background: LLM-Based NER Method Overview</h2>
|
||
<p>The referenced study introduced a prompt-driven approach to NER, reframing it “from a purely linguistic task into a humanities-focused task”. Instead of training a specialized NER model for each corpus, the method leverages the fact that large pretrained LLMs already contain vast world knowledge and language understanding. The key idea is to <strong>provide the model with contextual definitions and instructions</strong> so it can recognize entities in context. Notably, the authors found that with proper prompts, a commercial LLM (ChatGPT-4) could achieve <strong>precision and recall on par with or better than</strong> state-of-the-art NER tools on a 1921 historical travel guide. This was achieved <strong>zero-shot</strong>, i.e. without any fine-tuning or additional training data beyond the prompt itself.</p>
|
||
<p><strong>Prompt Strategy:</strong> The success of this approach hinges on careful prompt engineering. The final prompt used in the paper had multiple components:</p>
|
||
<ul>
|
||
<li><strong>Persona & Context:</strong> A brief introduction framing the LLM as an <em>expert</em> reading a historical text, possibly including domain context (e.g. “This text is an early 20th-century travel guide; language is old-fashioned”). This primes the model with relevant background.</li>
|
||
<li><strong>Task Instructions:</strong> A clear description of the NER task, including the list of entity categories and how to mark them in text. For example: <em>“Identify all Person (PER), Location (LOC), and Organization (ORG) names in the text and mark each by enclosing it in tags.”</em></li>
|
||
<li><strong>Optional Examples:</strong> A few examples of sentences with correct tagged output (few-shot learning) to guide the model. Interestingly, the study found that zero-shot prompting often <strong>outperformed few-shot</strong> until ~16 examples were provided. Given the cost of preparing examples and limited prompt length, our implementation will focus on zero-shot usage for simplicity.</li>
|
||
<li><strong>Reiteration & Emphasis:</strong> The prompt repeated key instructions in different words and emphasized compliance (e.g. <em>“Make sure you follow the tagging format exactly for every example.”</em>). This redundancy helps the model adhere to instructions.</li>
|
||
<li><strong>Prompt Engineering Tricks:</strong> They included creative cues to improve accuracy, such as offering a “monetary reward for each correct classification” and the phrase <em>“Take a deep breath and think step by step.”</em>. These tricks, drawn from prior work, encouraged the model to be thorough and careful.</li>
|
||
<li><strong>Output Format:</strong> Crucially, the model was asked to <strong>repeat the original text exactly</strong> but insert tags around entity mentions. The authors settled on a format like <code><<PER ... /PER>></code> to tag people, <code><<LOC ... /LOC>></code> for locations, etc., covering each full entity span. This inline tagging format leveraged the model’s familiarity with XML/HTML syntax (from its training data) and largely eliminated problems like unclosed tags or extra spaces. By instructing the model <em>not to alter any other text</em>, they ensured the output could be easily compared to the input and parsed for entities.</li>
|
||
</ul>
|
||
<p><strong>Why Local LLMs?</strong> The original experiments used a proprietary API (ChatGPT-4). To make the method accessible to all (and avoid data governance issues of cloud APIs), we implement it with <strong>open-source LLMs running locally</strong>. Recent openly licensed models are rapidly improving and can handle such extraction tasks given the right prompt. Running everything locally also aligns with the paper’s goal of “democratizing access” to NER for diverse, low-resource texts – there are no API costs or internet needed, and data stays on local hardware for privacy.</p>
|
||
</section>
|
||
<section id="solution-architecture" class="level2">
|
||
<h2 class="anchored" data-anchor-id="solution-architecture">Solution Architecture</h2>
|
||
<p>Our solution consists of a <strong>workflow in n8n</strong> that orchestrates the NER process, and a <strong>local Ollama server</strong> that hosts the LLM for text analysis. The high-level workflow is as follows:</p>
|
||
<ol type="1">
|
||
<li><strong>Webhook Trigger (n8n):</strong> A user initiates the process by sending an HTTP request to n8n’s webhook with two inputs: (a) a simple text defining the entity categories of interest (for example, <code>"PER, ORG, LOC"</code>), and (b) the text to analyze (either included in the request or accessible via a provided file URL). This trigger node captures the input and starts the automation.</li>
|
||
<li><strong>Prompt Construction (n8n):</strong> The workflow builds a structured prompt for the LLM. Based on the webhook input, it prepares the system instructions listing each entity type and guidelines, then appends the user’s text. Essentially, n8n will merge the <em>entity definitions</em> into a pre-defined prompt template (the one derived from the paper’s method). This can be done using a <strong>Function node</strong> or an <strong>LLM Prompt node</strong> in n8n to ensure the text and instructions are combined correctly.</li>
|
||
<li><strong>LLM Inference (Ollama + LLM):</strong> n8n then passes the prompt to an <strong>Ollama Chat Model node</strong>, which communicates with the Ollama server’s API. The Ollama daemon hosts the selected 14B model on the local GPU and returns the model’s completion. In our case, the completion will be the original text with NER tags inserted around the entities (e.g. <code><<PER John Doe /PER>> went to <<LOC Berlin /LOC>> ...</code>). This step harnesses the A100 GPU to generate results quickly, using the chosen model’s weights locally.</li>
|
||
<li><strong>Output Processing (n8n):</strong> The tagged text output from the LLM can be handled in two ways. The simplest is to <strong>return the tagged text directly</strong> as the response to the webhook call – allowing the user to see their original text with all entities highlighted by tags. Alternatively, n8n can post-process the tags to extract a structured list of entities (e.g. a JSON array of <code class="sourceCode json"><span class="fu">{</span><span class="dt">"entity"</span><span class="fu">:</span> <span class="st">"John Doe"</span><span class="fu">,</span> <span class="dt">"type"</span><span class="fu">:</span> <span class="st">"PER"</span><span class="fu">}</span></code> objects). This parsing can be done with a Regex or code node, but given our focus on correctness, we often trust the model’s tagging format to be consistent (the paper reported the format was reliably followed when instructed clearly). Finally, an <strong>HTTP Response</strong> node sends the results back to the user (or stores them), completing the workflow.</li>
|
||
</ol>
|
||
<p><strong>Workflow Structure:</strong> In n8n’s interface, the workflow might look like a sequence of connected nodes: <strong>Webhook → Function (build prompt) → AI Model (Ollama) → Webhook Response</strong>. If using n8n’s new AI Agent feature, some steps (like prompt templating) can be configured within the AI nodes themselves. The key is that the Ollama model node is configured to use the local server (usually at <code>http://127.0.0.1:11434</code> by default) and the specific model name. We assume the base pipeline (available on GitHub) already includes most of this structure – our task is to <strong>slot in the custom prompt and model configuration</strong> for the NER use case.</p>
|
||
</section>
|
||
<section id="setup-and-infrastructure-requirements" class="level2">
|
||
<h2 class="anchored" data-anchor-id="setup-and-infrastructure-requirements">Setup and Infrastructure Requirements</h2>
|
||
<p>To reproduce this solution, you will need a machine with an <strong>NVIDIA GPU</strong> and the following software components installed:</p>
|
||
<ul>
|
||
<li><p><strong>n8n (v1.</strong>x** or later)** – the workflow automation tool. You can install n8n via npm, Docker, or use the desktop app. For a server environment, Docker is convenient. For example, to run n8n with Docker:</p>
|
||
<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-it</span> <span class="at">--rm</span> <span class="dt">\</span></span>
|
||
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a> <span class="at">-p</span> 5678:5678 <span class="dt">\</span></span>
|
||
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a> <span class="at">-v</span> ~/.n8n:/home/node/.n8n <span class="dt">\</span></span>
|
||
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a> n8nio/n8n:latest</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>This exposes n8n on <code>http://localhost:5678</code> for the web interface. (If you use Docker and plan to connect to a host-running Ollama, start the container with <code>--network=host</code> to allow access to the Ollama API on localhost.)</p></li>
|
||
<li><p><strong>Ollama (v0.x*)</strong> – an LLM runtime that serves models via an HTTP API. Installing Ollama is straightforward: download the installer for your OS from the official site (Linux users can run the one-line script <code>curl -sSL https://ollama.com/install.sh | sh</code>). After installation, start the Ollama server (daemon) by running:</p>
|
||
<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">ollama</span> serve</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>This will launch the service listening on port 11434. You can verify it’s running by opening <code>http://localhost:11434</code> in a browser – it should respond with “Ollama is running”. <em>Note:</em> Ensure your system has recent NVIDIA drivers and CUDA support if using GPU. Ollama supports NVIDIA GPUs with compute capability ≥5.0 (the A100 is well above this). Use <code>nvidia-smi</code> to confirm your GPU is recognized. If everything is set up, Ollama will automatically use the GPU for model inference (falling back to CPU if none available).</p></li>
|
||
<li><p><strong>LLM Model (14B class):</strong> Finally, download at least one large language model to use for NER. You have a few options here, and you can “pull” them via Ollama’s CLI:</p>
|
||
<ul>
|
||
<li><p><em>DeepSeek-R1 14B:</em> A 14.8B-parameter model distilled from larger reasoning models (based on Qwen architecture). It’s optimized for reasoning tasks and compares to OpenAI’s models in quality. Pull it with:</p>
|
||
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">ollama</span> pull deepseek-r1:14b</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>This downloads ~9 GB of data (the quantized weights). If you have a very strong GPU (e.g. A100 80GB), you could even try <code>deepseek-r1:70b</code> (~43 GB), but 14B is a good balance for our use-case. DeepSeek-R1 is licensed MIT and designed to run locally with no restrictions.</p></li>
|
||
<li><p><em>Cogito 14B:</em> A 14B “hybrid reasoning” model by Deep Cogito, known for excellent instruction-following and multilingual capability. Pull it with:</p>
|
||
<div class="sourceCode" id="cb4"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="ex">ollama</span> pull cogito:14b</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>Cogito-14B is also ~9 GB (quantized) and supports an extended context window up to <strong>128k tokens</strong> – which is extremely useful if you plan to analyze very long documents without chunking. It’s trained in 30+ languages and tuned to follow complex instructions, which can help in structured output tasks like ours.</p></li>
|
||
<li><p><em>Others:</em> Ollama offers many models (LLaMA 2 variants, Mistral, etc.). For instance, <code>ollama pull llama2:13b</code> would get a LLaMA-2 13B model. These can work, but for best results in NER with no fine-tuning, we suggest using one of the above well-instructed models. If your hardware is limited, you could try a 7-8B model (e.g., <code>deepseek-r1:7b</code> or <code>cogito:8b</code>), which download faster and use ~4–5 GB VRAM, at the cost of some accuracy. In CPU-only scenarios, even a 1.5B model is available – it will run very slowly and likely miss more entities, but it proves the pipeline can work on minimal hardware.</p></li>
|
||
</ul></li>
|
||
</ul>
|
||
<p><strong>Hardware Requirements:</strong> Our case assumes an NVIDIA A100 GPU (40 GB), which comfortably hosts a 14B model in memory and accelerates inference. In practice, any modern GPU with ≥10 GB memory can run a 13–14B model in 4-bit quantization. For example, an RTX 3090 or 4090 (24 GB) could handle it, and even smaller GPUs (or Apple Silicon with 16+ GB RAM) can run 7B models. Ensure you have sufficient <strong>system RAM</strong> as well (at least as much as the model size, plus overhead for n8n – 16 GB RAM is a safe minimum for 14B). Disk space of ~10 GB per model is needed. If using Docker for n8n, allocate CPU and memory generously to avoid bottlenecks when the LLM node processes large text.</p>
|
||
</section>
|
||
<section id="building-the-n8n-workflow" class="level2">
|
||
<h2 class="anchored" data-anchor-id="building-the-n8n-workflow">Building the n8n Workflow</h2>
|
||
<p>With the environment ready, we now construct the n8n workflow that ties everything together. We outline each component with instructions:</p>
|
||
<section id="webhook-input-for-entities-and-text" class="level3">
|
||
<h3 class="anchored" data-anchor-id="webhook-input-for-entities-and-text">1. Webhook Input for Entities and Text</h3>
|
||
<p>Start by creating a <strong>Webhook trigger</strong> node in n8n. This will provide a URL (endpoint) that you can send a request to. Configure it to accept a POST request containing the necessary inputs. For example, we expect the request JSON to look like:</p>
|
||
<div class="sourceCode" id="cb5"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"entities"</span><span class="fu">:</span> <span class="st">"PER, ORG, LOC"</span><span class="fu">,</span></span>
|
||
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"text"</span><span class="fu">:</span> <span class="st">"John Doe visited Berlin in 1921 and met with the Board of Acme Corp."</span></span>
|
||
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||
<p>Here, <code>"entities"</code> is a simple comma-separated string of entity types (you could also accept an array or a more detailed schema; for simplicity we use the format used in the paper: PER for person, LOC for location, ORG for organization). The <code>"text"</code> field contains the content to analyze. In a real scenario, the text could be much longer or might be sent as a file. If it’s a file, one approach is to send it as form-data and use n8n’s <strong>Read Binary File</strong> + <strong>Move Binary Data</strong> nodes to get it into text form. Alternatively, send a URL in the JSON and use an HTTP Request node in the workflow to fetch the content. The key is that by the end of this step, we have the raw text and the list of entity labels available in the n8n workflow as variables.</p>
|
||
</section>
|
||
<section id="constructing-the-llm-prompt" class="level3">
|
||
<h3 class="anchored" data-anchor-id="constructing-the-llm-prompt">2. Constructing the LLM Prompt</h3>
|
||
<p>Next, add a node to build the prompt that will be fed to the LLM. You can use a <strong>Function</strong> node (JavaScript code) or the <strong>“Set” node</strong> to template a prompt string. We will create two pieces of prompt content: a <strong>system instruction</strong> (the role played by the system prompt in chat models) and the <strong>user message</strong> (which will contain the text to be processed).</p>
|
||
<p>According to the method, our <strong>system prompt</strong> should incorporate the following:</p>
|
||
<ul>
|
||
<li><strong>Persona/Context:</strong> e.g. <em>“You are a historian and archivist analyzing a historical document. The language may be old or have archaic spellings. You have extensive knowledge of people, places, and organizations relevant to the context.”</em> This establishes domain expertise in the model.</li>
|
||
<li><strong>Task Definition:</strong> e.g. <em>“Your task is to perform Named Entity Recognition. Identify all occurrences of the specified entity types in the given text and annotate them with the corresponding tags.”</em></li>
|
||
<li><strong>Entity Definitions:</strong> List the entity categories provided by the user, with a brief definition if needed. For example: <em>“The entity types are: PER (persons or fictional characters), ORG (organizations, companies, institutions), LOC (locations such as cities, countries, landmarks).”</em> If the user already provided definitions in the webhook, include those; otherwise a generic definition as shown is fine.</li>
|
||
<li><strong>Tagging Instructions:</strong> Clearly explain the tagging format. We adopt the format from the paper: each entity should be wrapped in <code><<TYPE ... /TYPE>></code>. So instruct: <em>“Enclose each entity in double angle brackets with its type label. For example: <<PER John Doe /PER>> for a person named John Doe. Do not alter any other text – only insert tags. Ensure every opening tag has a closing tag.”</em> Also mention that tags can nest or overlap if necessary (though that’s rare).</li>
|
||
<li><strong>Output Expectations:</strong> Emphasize that the output should be the <strong>exact original text, verbatim, with tags added</strong> and nothing else. For example: <em>“Repeat the input text exactly, adding the tags around the entities. Do not add explanations or remove any content. The output should look like the original text with markup.”</em> This is crucial to prevent the model from omitting or rephrasing text. The paper’s prompt literally had a line: “Repeat the given text exactly. Be very careful to ensure that nothing is added or removed apart from the annotations.”.</li>
|
||
<li><strong>Compliance & Thoughtfulness:</strong> We can borrow the trick of telling the model to take its time and be precise. For instance: <em>“Before answering, take a deep breath and think step by step. Make sure you find <strong>all</strong> entities. You will be rewarded for each correct tag.”</em> While the notion of reward is hypothetical, such phrasing has been observed to sharpen the model’s focus. This is optional but can be useful for complex texts.</li>
|
||
</ul>
|
||
<p>Once this system prompt is assembled as a single string, it will be sent as the system role content to the LLM. Now, for the <strong>user prompt</strong>, we simply supply the text to be analyzed. In many chat-based LLMs, the user message would contain the text on which the assistant should perform the task. We might prefix it with something like “Text to analyze:” for clarity, or just include the raw text. (Including a prefix is slightly safer to distinguish it from any instructions, but since the system prompt already set the task, the user message can be just the document text.)</p>
|
||
<p>In n8n, if using the <strong>Basic LLM Chain</strong> node, you can configure it to use a custom system prompt. For example, connect the Function/Set node output into the LLM node, and in the LLM node’s settings choose “Mode: Complete” or similar, then under <strong>System Instructions</strong> put an expression that references the constructed prompt text (e.g., <code>{ $json["prompt"] }</code> if the prompt was output to that field). The <strong>User Message</strong> can similarly be fed from the input text field (e.g., <code>{ $json["text"] }</code>). Essentially, we map our crafted instruction into the system role, and the actual content into the user role.</p>
|
||
</section>
|
||
<section id="configuring-the-local-llm-ollama-model-node" class="level3">
|
||
<h3 class="anchored" data-anchor-id="configuring-the-local-llm-ollama-model-node">3. Configuring the Local LLM (Ollama Model Node)</h3>
|
||
<p>Now configure the LLM node to use the <strong>Ollama</strong> backend and your downloaded model. n8n provides an “Ollama Chat Model” integration, which is a sub-node of the AI Agent system. In the n8n editor, add or open the LLM node (if using the AI Agent, this might be inside a larger agent node), and look for model selection. Select <strong>Ollama</strong> as the provider. You’ll need to set up a credential for Ollama API access – use <code>http://127.0.0.1:11434</code> as the host (instead of the default localhost, to avoid any IPv6 binding issues). No API key is needed since it’s local. Once connected, you should see a dropdown of available models (all the ones you pulled). Choose the 14B model you downloaded, e.g. <code>deepseek-r1:14b</code> or <code>cogito:14b</code>.</p>
|
||
<p>Double-check the <strong>parameters</strong> for generation. By default, Ollama models have their own preset for max tokens and temperature. For an extraction task, we want the model to stay <strong>focused and deterministic</strong>. It’s wise to set a relatively low temperature (e.g. 0.2) to reduce randomness, and a high max tokens so it can output the entire text with tags (set max tokens to at least the length of your input in tokens plus 10-20% for tags). If using Cogito with its 128k context, you can safely feed very long text; with other models (often ~4k context), ensure your text isn’t longer than the model’s context limit or use a model variant with extended context. If the model supports <strong>“tools” or functions</strong>, you won’t need those here – this is a single-shot prompt, not a multi-step agent requiring tool usage, so just the chat completion mode is sufficient.</p>
|
||
<p>At this point, when the workflow runs to this node, n8n will send the system and user messages to Ollama and wait for the response. The heavy lifting is done by the LLM on the GPU, which will generate the tagged text. On an A100, a 14B model can process a few thousand tokens of input and output in just a handful of seconds (exact time depends on the model and input size).</p>
|
||
</section>
|
||
<section id="returning-the-results" class="level3">
|
||
<h3 class="anchored" data-anchor-id="returning-the-results">4. Returning the Results</h3>
|
||
<p>After the LLM node, add a node to handle the output. If you want to present the <strong>tagged text</strong> directly, you can pass the LLM’s output to the final Webhook Response node (or if using the built-in n8n chat UI, you would see the answer in the chat). The tagged text will look something like:</p>
|
||
<pre class="plain"><code><<PER John Doe /PER>> visited <<LOC Berlin /LOC>> in 1921 and met with the Board
|
||
of <<ORG Acme Corp /ORG>>.</code></pre>
|
||
<p>This format highlights each identified entity. It is immediately human-readable with the tags, and trivial to post-process if needed. For example, one could use a regex like <code><<(\w+) (.*?) /\1>></code> to extract all <code>type</code> and <code>entity</code> pairs from the text. In n8n, a quick approach is to use a <strong>Function</strong> node to find all matches of that pattern in <code>item.json["data"]</code> (assuming the LLM output is in <code>data</code>). Then one could return a JSON array of entities. However, since our focus is on correctness and ease, you might simply return the marked-up text and perhaps document how to parse it externally if the user wants structured data.</p>
|
||
<p>Finally, use an <strong>HTTP Response</strong> node (if the workflow was triggered by a Webhook) to send back the results. If the workflow was triggered via n8n’s chat trigger (in the case of interactive usage), you would instead rely on the chat UI output. For a pure API workflow, the HTTP response will contain either the tagged text or a JSON of extracted entities, which the user’s script or application can then use.</p>
|
||
<p><strong>Note:</strong> If you plan to run multiple analyses or have an ongoing service, you might want to <strong>persist the Ollama server</strong> (don’t shut it down between runs) and perhaps keep the model loaded in VRAM for performance. Ollama will cache the model in memory after the first request, so subsequent requests are faster. On an A100, you could even load two models (if you plan to experiment with which gives better results) but be mindful of VRAM usage if doing so concurrently.</p>
|
||
</section>
|
||
</section>
|
||
<section id="model-selection-considerations" class="level2">
|
||
<h2 class="anchored" data-anchor-id="model-selection-considerations">Model Selection Considerations</h2>
|
||
<p>We provided two example 14B models (DeepSeek-R1 and Cogito) to use with this pipeline. Both are good choices, but here are some considerations and alternatives:</p>
|
||
<ul>
|
||
<li><strong>Accuracy vs. Speed:</strong> Larger models (like 14B or 30B) generally produce more accurate and coherent results, especially for complex instructions, compared to 7B models. Since our aim is correctness of NER output, the A100 allows us to use a 14B model which offers a sweet spot. In preliminary tests, these models can correctly tag most obvious entities and even handle some tricky cases (e.g. person names with titles, organizations that sound like person names, etc.) thanks to their pretrained knowledge. If you find the model is making mistakes, you could try a bigger model (Cogito 32B or 70B, if resources permit). Conversely, if you need faster responses and are willing to trade some accuracy, a 7-8B model or running the 14B at a higher quantization (e.g. 4-bit) on CPU might be acceptable for smaller texts.</li>
|
||
<li><strong>Domain of the Text:</strong> The paper dealt with historical travel guide text (1920s era). These open models have been trained on large internet corpora, so they likely have seen a lot of historical names and terms, but their coverage might not be as exhaustive as GPT-4. If your text is in a specific domain (say, ancient mythology or very obscure local history), the model might miss entities that it doesn’t recognize as famous. The prompt’s context can help (for example, adding a note like <em>“Note: Mythological characters should be considered PERSON entities.”</em> as they did for Greek gods). For extremely domain-specific needs, one could fine-tune a model or use a specialized one, but that moves beyond the zero-shot philosophy.</li>
|
||
<li><strong>Language:</strong> If your texts are not in English, ensure the chosen model is multilingual. Cogito, for instance, was trained in over 30 languages, so it can handle many European languages (the paper also tested German prompts). If using a model that’s primarily English (like some LLaMA variants), you might get better results by writing the instructions in English but letting it output tags in the original text. The study found English prompts initially gave better recall even on German text, but with prompt tweaks the gap closed. For our pipeline, you can simply provide the definitions in English and the text in the foreign language – a capable model will still tag the foreign entities. For example, Cogito or DeepSeek should tag a German sentence’s <em>“Herr Schmidt”</em> as <code><<PER Herr Schmidt /PER>></code>. Always test on a small sample if in doubt.</li>
|
||
<li><strong>Extended Context:</strong> If your input text is very long (tens of thousands of words), you should chunk it into smaller segments (e.g. paragraph by paragraph) and run the model on each, then merge the outputs. This is because most models (including DeepSeek 14B) have a context window of 2048–8192 tokens. However, Cogito’s 128k context capability is a game-changer – in theory you could feed an entire book and get a single output. Keep in mind the time and memory usage will grow with very large inputs, and n8n might need increased timeout settings for such long runs. For typical use (a few pages of text at a time), the standard context is sufficient.</li>
|
||
</ul>
|
||
<p>In our implementation, we encourage experimenting with both DeepSeek-R1 and Cogito models. Both are <strong>open-source and free for commercial use</strong> (Cogito uses an Apache 2.0 license, DeepSeek MIT). They represent some of the best 14B-class models as of early 2025. You can cite these models in any academic context if needed, or even switch to another model with minimal changes to the n8n workflow (just pull the model and change the model name in the Ollama node).</p>
|
||
</section>
|
||
<section id="example-run" class="level2">
|
||
<h2 class="anchored" data-anchor-id="example-run">Example Run</h2>
|
||
<p>Let’s run through a hypothetical example to illustrate the output. Suppose a historian supplies the following via the webhook:</p>
|
||
<ul>
|
||
<li><strong>Entities:</strong> <code>PER, ORG, LOC</code></li>
|
||
<li><strong>Text:</strong> <em>“Baron Münchhausen was born in Bodenwerder and served in the Russian military under Empress Anna. Today, the Münchhausen Museum in Bodenwerder is operated by the town council.”</em></li>
|
||
</ul>
|
||
<p>When the workflow executes, the LLM receives instructions to tag people (PER), organizations (ORG), and locations (LOC). With the prompt techniques described, the model’s output might look like:</p>
|
||
<pre class="plain"><code><<PER Baron Münchhausen /PER>> was born in <<LOC Bodenwerder /LOC>> and served
|
||
in the Russian military under <<PER Empress Anna /PER>>. Today, the <<ORG
|
||
Münchhausen Museum /ORG>> in <<LOC Bodenwerder /LOC>> is operated by the town
|
||
council.</code></pre>
|
||
<p>All person names (Baron Münchhausen, Empress Anna) are enclosed in <code><<PER>></code> tags, the museum is marked as an organization, and the town Bodenwerder is marked as a location (twice). The rest of the sentence remains unchanged. This output can be returned as-is to the user. They can visually verify it or programmatically parse out the tagged entities. The correctness of outputs is high: each tag corresponds to a real entity mention in the text, and there are no hallucinated tags. If the model were to make an error (say, tagging “Russian” as LOC erroneously), the user could adjust the prompt (for example, clarify that national adjectives are not entities) and re-run.</p>
|
||
</section>
|
||
<section id="limitations-and-solutions" class="level2">
|
||
<h2 class="anchored" data-anchor-id="limitations-and-solutions">Limitations and Solutions</h2>
|
||
<p>While this pipeline makes NER easier to reproduce, it’s important to be aware of its limitations and how to mitigate them:</p>
|
||
<ul>
|
||
<li><p><strong>Model Misclassifications:</strong> A local 14B model may not match GPT-4’s level of understanding. It might occasionally tag something incorrectly or miss a subtle entity. For instance, in historical texts, titles or honorifics (e.g. <em>“Dr. John Smith”</em>) might confuse it, or a ship name might be tagged as ORG when it’s not in our categories. <strong>Solution:</strong> Refine the prompt with additional guidance. You can add a “Note” section in the instructions to handle known ambiguities (the paper did this with notes about Greek gods being persons, etc.). Also, a quick manual review or spot-check is recommended for important outputs. Since the output format is simple, a human or a simple script can catch obvious mistakes (e.g., if “Russian” was tagged LOC, a post-process could remove it knowing it’s likely wrong). Over time, if you notice a pattern of mistakes, update the prompt instructions accordingly.</p></li>
|
||
<li><p><strong>Text Reproduction Issues:</strong> We instruct the model to output the original text verbatim with tags, but LLMs sometimes can’t resist minor changes. They may “correct” spelling or punctuation, or alter spacing. The paper noted this tendency and used fuzzy matching when evaluating. In our pipeline, minor format changes usually don’t harm the extraction, but if preserving text exactly is important (say for downstream alignment), this is a concern. <strong>Solution:</strong> Emphasize fidelity in the prompt (we already do). If needed, do a diff between the original text and tagged text and flag differences. Usually differences will be small (e.g., changing an old spelling to modern). You can then either accept them or attempt a more rigid approach (like asking for a JSON list of entity offsets – though that introduces other complexities and was intentionally avoided by the authors). In practice, we found the tag insertion approach with strong instructions yields nearly identical text apart from the tags.</p></li>
|
||
<li><p><strong>Long Inputs and Memory:</strong> Very large documents may exceed the model’s input capacity or make the process slow. The A100 GPU can handle a lot, but n8n itself might have default timeouts for a single workflow execution. <strong>Solution:</strong> For long texts, break the input into smaller chunks (maybe one chapter or section at a time). n8n can loop through chunks using the Split In Batches node or simply by splitting the text in the Function node and feeding the LLM node multiple times. You’d then concatenate the outputs. If chunking, ensure that if an entity spans a chunk boundary, it might be missed – usually rare in well-chosen chunk boundaries (paragraph or sentence). Alternatively, use Cogito for its extended context to avoid chunking. Make sure to increase n8n’s execution timeout if needed (via environment variable <code class="sourceCode bash"><span class="ex">N8N_DEFAULT_TIMEOUT</span></code> or in the workflow settings).</p></li>
|
||
<li><p><strong>Concurrent Usage:</strong> If multiple users or processes hit the webhook simultaneously, they would be sharing the single LLM instance. Ollama can queue requests, but the GPU will handle them one at a time (unless running separate instances with multiple GPUs). For a research setting with one user at a time, this is fine. If offering this as a service to others, consider queuing requests or scaling out (multiple replicas of this workflow on different GPU machines). The stateless design of the prompt makes each run independent.</p></li>
|
||
<li><p><strong>n8n Learning Curve:</strong> For historians new to n8n, setting up the workflow might be unfamiliar. However, n8n’s no-code interface is fairly intuitive with a bit of guidance. This case study provides the logic; one can also import pre-built workflows. In fact, the <em>n8n</em> community has template workflows (for example, a template for chatting with local LLMs) that could be adapted. We assume the base pipeline from the paper’s authors is available on GitHub – using that as a starting point, one mostly needs to adjust nodes as described. If needed, one can refer to n8n’s official docs or community forum for help on creating a webhook or using function nodes. Once set up, running the workflow is as easy as sending an HTTP request or clicking “Execute Workflow” in n8n.</p></li>
|
||
<li><p><strong>Output Verification:</strong> Since we prioritize correctness, you may want to evaluate how well the model did, especially if you have ground truth annotations. While benchmarking is out of scope here, note that you can integrate evaluation into the pipeline too. For instance, if you had a small test set with known entities, you could compare the model output tags with expected tags using a Python script (n8n has an Execute Python node) or use an NER evaluation library like <em>nervaluate</em> for precision/recall. This is exactly what the authors did to report performance, and you could mimic that to gauge your chosen model’s accuracy.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="conclusion" class="level2">
|
||
<h2 class="anchored" data-anchor-id="conclusion">Conclusion</h2>
|
||
<p>By following this guide, we implemented the <strong>NER4All</strong> paper’s methodology with a local, reproducible setup. We used n8n to handle automation and prompt assembly, and a local LLM (via Ollama) to perform the heavy-duty language understanding. The result is a flexible NER pipeline that requires <strong>no training data or API access</strong> – just a well-crafted prompt and a powerful pretrained model. We demonstrated how a user can specify custom entity types and get their text annotated in one click or API call. The approach leverages the strengths of LLMs (vast knowledge and language proficiency) to adapt to historical or niche texts, aligning with the paper’s finding that a bit of context and expert prompt design can unlock high NER performance.</p>
|
||
<p>Importantly, this setup is <strong>easy to reproduce</strong>: all components are either open-source or freely available (n8n, Ollama, and the models). A research engineer or historian can run it on a single machine with sufficient resources, and it can be shared as a workflow file for others to import. By removing the need for extensive data preparation or model training, this lowers the barrier to extracting structured information from large text archives.</p>
|
||
<p>Moving forward, users can extend this case study in various ways: adding more entity types (just update the definitions input), switching to other LLMs as they become available (perhaps a future 20B model with even better understanding), or integrating the output with databases or search indexes for further analysis. With the rapid advancements in local AI models, we anticipate that such pipelines will become even more accurate and faster over time, continually democratizing access to advanced NLP for all domains.</p>
|
||
<p><strong>Sources:</strong> This implementation draws on insights from Ahmed et al. (2025) for the prompt-based NER method, and uses tools like n8n and Ollama as documented in their official guides. The chosen models (DeepSeek-R1 and Cogito) are described in their respective releases. All software and models are utilized in accordance with their licenses for a fully local deployment.</p>
|
||
</section>
|
||
|
||
|
||
<div id="quarto-appendix" class="default"><section id="methodik-llms-als-autoren" class="level2 appendix"><h2 class="anchored quarto-appendix-heading">Methodik / LLMs als ‘Autoren’</h2><div class="quarto-appendix-contents">
|
||
|
||
<p>Erstellt wurde der initial draft mittels Websuche und “Deep-Research” von <code>gpt-4.5 (preview)</code>. Abschließendes Korrekturlesen/inhaltliche Prüfung/Layouting durch Nicole Dresselhaus.</p>
|
||
|
||
|
||
|
||
</div></section><section class="quarto-appendix-contents" role="doc-bibliography" id="quarto-bibliography"><h2 class="anchored quarto-appendix-heading">References</h2><div id="refs" class="references csl-bib-body" data-entry-spacing="0" role="list">
|
||
<div id="ref-ollama_chroma_cookbook" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">1. </div><div class="csl-right-inline"><a href="https://cookbook.chromadb.dev/integrations/ollama/embeddings/">Ollama - chroma cookbook</a>. 2024.</div>
|
||
</div>
|
||
<div id="ref-smart_connections_plugin" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">2. </div><div class="csl-right-inline"><a href="https://www.reddit.com/r/ObsidianMD/comments/1fzmkdk/just_wanted_to_mention_that_the_smart_connections/">Just wanted to mention that the smart connections plugin is incredible. : R/ObsidianMD</a>. 2024.</div>
|
||
</div>
|
||
<div id="ref-khoj_plugin" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">3. </div><div class="csl-right-inline"><a href="https://forum.obsidian.md/t/khoj-an-ai-powered-search-assistant-for-you-second-brain/53756">Khoj: An AI powered search assistant for your second brain - share & showcase - obsidian forum</a>. 2023.</div>
|
||
</div>
|
||
<div id="ref-supercharging_obsidian_search" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">4. </div><div class="csl-right-inline">@airabbitX. 2024. <a href="https://medium.com/@airabbitX/supercharging-obsidian-search-with-local-llms-a-personal-journey-1e008eb649a6">Supercharging obsidian search with AI and ollama</a>.</div>
|
||
</div>
|
||
<div id="ref-export_to_common_graph_formats" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">5. </div><div class="csl-right-inline"><a href="https://forum.obsidian.md/t/export-to-common-graph-formats/4138">Export to common graph formats - plugins ideas - obsidian forum</a>. 2020.</div>
|
||
</div>
|
||
<div id="ref-personal_knowledge_graphs_in_obsidian" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">6. </div><div class="csl-right-inline">Pavlyshyn, Volodymyr. 2024. <a href="https://volodymyrpavlyshyn.medium.com/personal-knowledge-graphs-in-obsidian-528a0f4584b9">Personal knowledge graphs in obsidian</a>.</div>
|
||
</div>
|
||
<div id="ref-export_obsidian_to_rdf" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">7. </div><div class="csl-right-inline">Pavlyshyn, Volodymyr. 2024. <a href="https://volodymyrpavlyshyn.medium.com/how-to-export-your-obsidian-vault-to-rdf-00fb2539ed18">How to export your obsidian vault to RDF</a>.</div>
|
||
</div>
|
||
<div id="ref-ai_empowered_zettelkasten_with_ner_and_graph_llm" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">8. </div><div class="csl-right-inline"><a href="https://forum.obsidian.md/t/ai-empowered-zettelkasten-with-ner-and-graph-llm/79112">AI empowered zettelkasten with NER and graph LLM - knowledge management - obsidian forum</a>. 2024.</div>
|
||
</div>
|
||
<div id="ref-build_your_second_brain_with_khoj_ai" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">9. </div><div class="csl-right-inline"><a href="https://dswharshit.medium.com/build-your-second-brain-with-khoj-ai-high-signal-ai-2-87492730d7ce">Build your second brain with khoj AI</a>. 2024.</div>
|
||
</div>
|
||
<div id="ref-second_brain_assistant_with_obsidian" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">10. </div><div class="csl-right-inline"><a href="https://www.ssp.sh/brain/second-brain-assistant-with-obsidian-notegpt/">Second brain assistant with obsidian</a>. 2025.</div>
|
||
</div>
|
||
<div id="ref-basic_memory_ai_conversations_that_build_knowledge" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">11. </div><div class="csl-right-inline"><a href="https://basicmachines.co/">Basic memory | AI conversations that build knowledge</a>.</div>
|
||
</div>
|
||
<div id="ref-local_free_rag_with_question_generation" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">12. </div><div class="csl-right-inline">Galvis, Oscar. 2024. <a href="https://lomaky.medium.com/local-free-rag-with-question-generation-using-lm-studio-nomic-embeddings-chromadb-and-llama-3-2-9758877e93b4">Local (free) RAG with question generation using LM studio, nomic embeddings, ChromaDB and llama 3.2 on a mac mini M1</a>.</div>
|
||
</div>
|
||
<div id="ref-private_gpt_llama_cpp_based_scripts" class="csl-entry" role="listitem">
|
||
<div class="csl-left-margin">13. </div><div class="csl-right-inline"><a href="https://www.ssp.sh/brain/second-brain-assistant-with-obsidian-notegpt/">privateGPT / llama.cpp based scripts</a>. 2025.</div>
|
||
</div>
|
||
</div></section><section class="quarto-appendix-contents" id="quarto-citation"><h2 class="anchored quarto-appendix-heading">Citation</h2><div><div class="quarto-appendix-secondary-label">BibTeX citation:</div><pre class="sourceCode code-with-copy quarto-appendix-bibtex"><code class="sourceCode bibtex">@online{2025,
|
||
author = {, GPT-4.5 and , cogito-v1-preview and Dresselhaus, Nicole},
|
||
title = {Case {Study:} {Local} {LLM-Based} {NER} with N8n and
|
||
{Ollama}},
|
||
date = {2025-05-05},
|
||
url = {https://nicole.dresselhaus.cloud/Writing/ner4all-case-study.html},
|
||
langid = {en},
|
||
abstract = {Named Entity Recognition (NER) is a foundational task in
|
||
text analysis, traditionally addressed by training NLP models on
|
||
annotated data. However, a recent study – \_“NER4All or Context is
|
||
All You Need”\_ – showed that out-of-the-box Large Language Models
|
||
(LLMs) can **significantly outperform** classical NER pipelines
|
||
(e.g. spaCy, Flair) on historical texts by using clever prompting,
|
||
without any model retraining. This case study demonstrates how to
|
||
implement the paper’s method using entirely local infrastructure: an
|
||
**n8n** automation workflow (for orchestration) and a **Ollama**
|
||
server running a 14B-parameter LLM on an NVIDIA A100 GPU. The goal
|
||
is to enable research engineers and tech-savvy historians to
|
||
**reproduce and apply this method easily** on their own data, with a
|
||
focus on usability and correct outputs rather than raw performance.
|
||
We will walk through the end-to-end solution – from accepting a
|
||
webhook input that defines entity types (e.g. Person, Organization,
|
||
Location) to prompting a local LLM to extract those entities from a
|
||
text. The solution covers setup instructions, required
|
||
infrastructure (GPU, memory, software), model configuration, and
|
||
workflow design in n8n. We also discuss potential limitations (like
|
||
model accuracy and context length) and how to address them. By the
|
||
end, you will have a clear blueprint for a **self-hosted NER
|
||
pipeline** that leverages the knowledge encoded in LLMs (as
|
||
advocated by the paper) while maintaining data privacy and
|
||
reproducibility.}
|
||
}
|
||
</code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre><div class="quarto-appendix-secondary-label">For attribution, please cite this work as:</div><div id="ref-2025" class="csl-entry quarto-appendix-citeas" role="listitem">
|
||
GPT-4.5, cogito-v1-preview, and Nicole Dresselhaus. 2025. <span>“Case
|
||
Study: Local LLM-Based NER with N8n and Ollama.”</span> May 5, 2025. <a href="https://nicole.dresselhaus.cloud/Writing/ner4all-case-study.html">https://nicole.dresselhaus.cloud/Writing/ner4all-case-study.html</a>.
|
||
</div></div></section></div></main> <!-- /main -->
|
||
<script id="quarto-html-after-body" type="application/javascript">
|
||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||
// Ensure there is a toggle, if there isn't float one in the top right
|
||
if (window.document.querySelector('.quarto-color-scheme-toggle') === null) {
|
||
const a = window.document.createElement('a');
|
||
a.classList.add('top-right');
|
||
a.classList.add('quarto-color-scheme-toggle');
|
||
a.href = "";
|
||
a.onclick = function() { try { window.quartoToggleColorScheme(); } catch {} return false; };
|
||
const i = window.document.createElement("i");
|
||
i.classList.add('bi');
|
||
a.appendChild(i);
|
||
window.document.body.appendChild(a);
|
||
}
|
||
window.setColorSchemeToggle(window.hasAlternateSentinel())
|
||
const icon = "";
|
||
const anchorJS = new window.AnchorJS();
|
||
anchorJS.options = {
|
||
placement: 'right',
|
||
icon: icon
|
||
};
|
||
anchorJS.add('.anchored');
|
||
const isCodeAnnotation = (el) => {
|
||
for (const clz of el.classList) {
|
||
if (clz.startsWith('code-annotation-')) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
const onCopySuccess = function(e) {
|
||
// button target
|
||
const button = e.trigger;
|
||
// don't keep focus
|
||
button.blur();
|
||
// flash "checked"
|
||
button.classList.add('code-copy-button-checked');
|
||
var currentTitle = button.getAttribute("title");
|
||
button.setAttribute("title", "Copied!");
|
||
let tooltip;
|
||
if (window.bootstrap) {
|
||
button.setAttribute("data-bs-toggle", "tooltip");
|
||
button.setAttribute("data-bs-placement", "left");
|
||
button.setAttribute("data-bs-title", "Copied!");
|
||
tooltip = new bootstrap.Tooltip(button,
|
||
{ trigger: "manual",
|
||
customClass: "code-copy-button-tooltip",
|
||
offset: [0, -8]});
|
||
tooltip.show();
|
||
}
|
||
setTimeout(function() {
|
||
if (tooltip) {
|
||
tooltip.hide();
|
||
button.removeAttribute("data-bs-title");
|
||
button.removeAttribute("data-bs-toggle");
|
||
button.removeAttribute("data-bs-placement");
|
||
}
|
||
button.setAttribute("title", currentTitle);
|
||
button.classList.remove('code-copy-button-checked');
|
||
}, 1000);
|
||
// clear code selection
|
||
e.clearSelection();
|
||
}
|
||
const getTextToCopy = function(trigger) {
|
||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||
for (const childEl of codeEl.children) {
|
||
if (isCodeAnnotation(childEl)) {
|
||
childEl.remove();
|
||
}
|
||
}
|
||
return codeEl.innerText;
|
||
}
|
||
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
|
||
text: getTextToCopy
|
||
});
|
||
clipboard.on('success', onCopySuccess);
|
||
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
|
||
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
|
||
text: getTextToCopy,
|
||
container: window.document.getElementById('quarto-embedded-source-code-modal')
|
||
});
|
||
clipboardModal.on('success', onCopySuccess);
|
||
}
|
||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||
var mailtoRegex = new RegExp(/^mailto:/);
|
||
var filterRegex = new RegExp("https:\/\/nicole\.dresselhaus\.cloud");
|
||
var isInternal = (href) => {
|
||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||
}
|
||
// Inspect non-navigation links and adorn them if external
|
||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
|
||
for (var i=0; i<links.length; i++) {
|
||
const link = links[i];
|
||
if (!isInternal(link.href)) {
|
||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||
// links that we want to consider external
|
||
if (link.dataset.originalHref !== undefined) {
|
||
link.href = link.dataset.originalHref;
|
||
}
|
||
// target, if specified
|
||
link.setAttribute("target", "_blank");
|
||
if (link.getAttribute("rel") === null) {
|
||
link.setAttribute("rel", "noopener");
|
||
}
|
||
// default icon
|
||
link.classList.add("external");
|
||
}
|
||
}
|
||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||
const config = {
|
||
allowHTML: true,
|
||
maxWidth: 500,
|
||
delay: 100,
|
||
arrow: false,
|
||
appendTo: function(el) {
|
||
return el.parentElement;
|
||
},
|
||
interactive: true,
|
||
interactiveBorder: 10,
|
||
theme: 'quarto',
|
||
placement: 'bottom-start',
|
||
};
|
||
if (contentFn) {
|
||
config.content = contentFn;
|
||
}
|
||
if (onTriggerFn) {
|
||
config.onTrigger = onTriggerFn;
|
||
}
|
||
if (onUntriggerFn) {
|
||
config.onUntrigger = onUntriggerFn;
|
||
}
|
||
window.tippy(el, config);
|
||
}
|
||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||
for (var i=0; i<noterefs.length; i++) {
|
||
const ref = noterefs[i];
|
||
tippyHover(ref, function() {
|
||
// use id or data attribute instead here
|
||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||
try { href = new URL(href).hash; } catch {}
|
||
const id = href.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note) {
|
||
return note.innerHTML;
|
||
} else {
|
||
return "";
|
||
}
|
||
});
|
||
}
|
||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||
const processXRef = (id, note) => {
|
||
// Strip column container classes
|
||
const stripColumnClz = (el) => {
|
||
el.classList.remove("page-full", "page-columns");
|
||
if (el.children) {
|
||
for (const child of el.children) {
|
||
stripColumnClz(child);
|
||
}
|
||
}
|
||
}
|
||
stripColumnClz(note)
|
||
if (id === null || id.startsWith('sec-')) {
|
||
// Special case sections, only their first couple elements
|
||
const container = document.createElement("div");
|
||
if (note.children && note.children.length > 2) {
|
||
container.appendChild(note.children[0].cloneNode(true));
|
||
for (let i = 1; i < note.children.length; i++) {
|
||
const child = note.children[i];
|
||
if (child.tagName === "P" && child.innerText === "") {
|
||
continue;
|
||
} else {
|
||
container.appendChild(child.cloneNode(true));
|
||
break;
|
||
}
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(container);
|
||
}
|
||
return container.innerHTML
|
||
} else {
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
return note.innerHTML;
|
||
}
|
||
} else {
|
||
// Remove any anchor links if they are present
|
||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||
if (anchorLink) {
|
||
anchorLink.remove();
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
if (note.classList.contains("callout")) {
|
||
return note.outerHTML;
|
||
} else {
|
||
return note.innerHTML;
|
||
}
|
||
}
|
||
}
|
||
for (var i=0; i<xrefs.length; i++) {
|
||
const xref = xrefs[i];
|
||
tippyHover(xref, undefined, function(instance) {
|
||
instance.disable();
|
||
let url = xref.getAttribute('href');
|
||
let hash = undefined;
|
||
if (url.startsWith('#')) {
|
||
hash = url;
|
||
} else {
|
||
try { hash = new URL(url).hash; } catch {}
|
||
}
|
||
if (hash) {
|
||
const id = hash.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note !== null) {
|
||
try {
|
||
const html = processXRef(id, note.cloneNode(true));
|
||
instance.setContent(html);
|
||
} finally {
|
||
instance.enable();
|
||
instance.show();
|
||
}
|
||
} else {
|
||
// See if we can fetch this
|
||
fetch(url.split('#')[0])
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.getElementById(id);
|
||
if (note !== null) {
|
||
const html = processXRef(id, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
} else {
|
||
// See if we can fetch a full url (with no hash to target)
|
||
// This is a special case and we should probably do some content thinning / targeting
|
||
fetch(url)
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.querySelector('main.content');
|
||
if (note !== null) {
|
||
// This should only happen for chapter cross references
|
||
// (since there is no id in the URL)
|
||
// remove the first header
|
||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||
note.children[0].remove();
|
||
}
|
||
const html = processXRef(null, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
}, function(instance) {
|
||
});
|
||
}
|
||
let selectedAnnoteEl;
|
||
const selectorForAnnotation = ( cell, annotation) => {
|
||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||
return selector;
|
||
}
|
||
const selectCodeLines = (annoteEl) => {
|
||
const doc = window.document;
|
||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||
const lineIds = lines.map((line) => {
|
||
return targetCell + "-" + line;
|
||
})
|
||
let top = null;
|
||
let height = null;
|
||
let parent = null;
|
||
if (lineIds.length > 0) {
|
||
//compute the position of the single el (top and bottom and make a div)
|
||
const el = window.document.getElementById(lineIds[0]);
|
||
top = el.offsetTop;
|
||
height = el.offsetHeight;
|
||
parent = el.parentElement.parentElement;
|
||
if (lineIds.length > 1) {
|
||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||
height = bottom - top;
|
||
}
|
||
if (top !== null && height !== null && parent !== null) {
|
||
// cook up a div (if necessary) and position it
|
||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||
if (div === null) {
|
||
div = window.document.createElement("div");
|
||
div.setAttribute("id", "code-annotation-line-highlight");
|
||
div.style.position = 'absolute';
|
||
parent.appendChild(div);
|
||
}
|
||
div.style.top = top - 2 + "px";
|
||
div.style.height = height + 4 + "px";
|
||
div.style.left = 0;
|
||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||
if (gutterDiv === null) {
|
||
gutterDiv = window.document.createElement("div");
|
||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||
gutterDiv.style.position = 'absolute';
|
||
const codeCell = window.document.getElementById(targetCell);
|
||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||
gutter.appendChild(gutterDiv);
|
||
}
|
||
gutterDiv.style.top = top - 2 + "px";
|
||
gutterDiv.style.height = height + 4 + "px";
|
||
}
|
||
selectedAnnoteEl = annoteEl;
|
||
}
|
||
};
|
||
const unselectCodeLines = () => {
|
||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||
elementsIds.forEach((elId) => {
|
||
const div = window.document.getElementById(elId);
|
||
if (div) {
|
||
div.remove();
|
||
}
|
||
});
|
||
selectedAnnoteEl = undefined;
|
||
};
|
||
// Handle positioning of the toggle
|
||
window.addEventListener(
|
||
"resize",
|
||
throttle(() => {
|
||
elRect = undefined;
|
||
if (selectedAnnoteEl) {
|
||
selectCodeLines(selectedAnnoteEl);
|
||
}
|
||
}, 10)
|
||
);
|
||
function throttle(fn, ms) {
|
||
let throttle = false;
|
||
let timer;
|
||
return (...args) => {
|
||
if(!throttle) { // first call gets through
|
||
fn.apply(this, args);
|
||
throttle = true;
|
||
} else { // all the others get throttled
|
||
if(timer) clearTimeout(timer); // cancel #2
|
||
timer = setTimeout(() => {
|
||
fn.apply(this, args);
|
||
timer = throttle = false;
|
||
}, ms);
|
||
}
|
||
};
|
||
}
|
||
// Attach click handler to the DT
|
||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||
for (const annoteDlNode of annoteDls) {
|
||
annoteDlNode.addEventListener('click', (event) => {
|
||
const clickedEl = event.target;
|
||
if (clickedEl !== selectedAnnoteEl) {
|
||
unselectCodeLines();
|
||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||
if (activeEl) {
|
||
activeEl.classList.remove('code-annotation-active');
|
||
}
|
||
selectCodeLines(clickedEl);
|
||
clickedEl.classList.add('code-annotation-active');
|
||
} else {
|
||
// Unselect the line
|
||
unselectCodeLines();
|
||
clickedEl.classList.remove('code-annotation-active');
|
||
}
|
||
});
|
||
}
|
||
const findCites = (el) => {
|
||
const parentEl = el.parentElement;
|
||
if (parentEl) {
|
||
const cites = parentEl.dataset.cites;
|
||
if (cites) {
|
||
return {
|
||
el,
|
||
cites: cites.split(' ')
|
||
};
|
||
} else {
|
||
return findCites(el.parentElement)
|
||
}
|
||
} else {
|
||
return undefined;
|
||
}
|
||
};
|
||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||
for (var i=0; i<bibliorefs.length; i++) {
|
||
const ref = bibliorefs[i];
|
||
const citeInfo = findCites(ref);
|
||
if (citeInfo) {
|
||
tippyHover(citeInfo.el, function() {
|
||
var popup = window.document.createElement('div');
|
||
citeInfo.cites.forEach(function(cite) {
|
||
var citeDiv = window.document.createElement('div');
|
||
citeDiv.classList.add('hanging-indent');
|
||
citeDiv.classList.add('csl-entry');
|
||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||
if (biblioDiv) {
|
||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||
}
|
||
popup.appendChild(citeDiv);
|
||
});
|
||
return popup.innerHTML;
|
||
});
|
||
}
|
||
}
|
||
});
|
||
</script>
|
||
</div> <!-- /content -->
|
||
|
||
|
||
|
||
|
||
</body></html> |