diff --git a/nodes/theoryofcomputing.jsonld b/nodes/theoryofcomputing.jsonld
new file mode 100644
index 0000000..6161d7f
--- /dev/null
+++ b/nodes/theoryofcomputing.jsonld
@@ -0,0 +1,11 @@
+{
+    "@context": {
+        "library": "dokk:vocab:library:"
+    },
+    "@type": [
+        "library:JournalArticle"
+    ],
+    "@id": "dokk:theoryofcomputing",
+    "library:title": "Theory of Computing",
+    "library:website": "https://theoryofcomputing.org"
+}
diff --git a/scripts/theoryofcomputing.org/.gitignore b/scripts/theoryofcomputing.org/.gitignore
new file mode 100644
index 0000000..b950c4c
--- /dev/null
+++ b/scripts/theoryofcomputing.org/.gitignore
@@ -0,0 +1,4 @@
+/nodes
+/pdf
+/theoryofcomputing.org
+/venv
diff --git a/scripts/theoryofcomputing.org/README b/scripts/theoryofcomputing.org/README
new file mode 100644
index 0000000..ababd72
--- /dev/null
+++ b/scripts/theoryofcomputing.org/README
@@ -0,0 +1,27 @@
+Articles are grouped by volumes, and they're indexed at https://theoryofcomputing.org/articles/main/
+Each article page has a link to a source.zip file containing all the info about the specific
+article. The idea is to download all these zip files and extract info from them.
+
+There are instructions for mirroring with rsync but they are outdated. Therefore we need
+to scrap the website using wget.
+
+Parsing latex from Python is a nightmare (cannot find any module, and not all papers use
+the same latex snippets) therefore some data is extracted from the articles' HTML pages (they
+use Google Scholar citation_* <meta> tags).
+
+
+Mirror the whole website:
+
+    wget --mirror https://theoryofcomputing.org
+
+
+Decompress all "source.zip" archives into "source.zip.decompressed":
+
+    find -type f -name "source.zip" -exec unzip -d "{}.decompressed" "{}" \;
+
+
+Extract data from the mirror and create the nodes:
+
+    mkdir --parents pdf/theoryofcomputing.org
+    mkdir nodes
+    find -type d -regex ".*/articles/v[0-9][0-9][0-9]a[0-9][0-9][0-9]$" -exec ./toc.py {} \;
diff --git a/scripts/theoryofcomputing.org/requirements.txt b/scripts/theoryofcomputing.org/requirements.txt
new file mode 100644
index 0000000..f6c88ce
--- /dev/null
+++ b/scripts/theoryofcomputing.org/requirements.txt
@@ -0,0 +1,6 @@
+lxml
+
+# https://github.com/sciunto-org/python-bibtexparser
+--pre
+bibtexparser
+
diff --git a/scripts/theoryofcomputing.org/toc.py b/scripts/theoryofcomputing.org/toc.py
new file mode 100755
index 0000000..e793b0d
--- /dev/null
+++ b/scripts/theoryofcomputing.org/toc.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+
+# This script expect an article folder as input, for example
+# ./theoryofcomputing.org/articles/v001a001
+
+import bibtexparser
+import json
+import os
+import re
+import subprocess
+import sys
+from datetime import datetime
+from lxml import html
+
+article_path = sys.argv[1]
+assert os.path.isdir(article_path)
+
+bibtex = None
+try:
+    bibtex = bibtexparser.parse_file(f'{article_path}/bibtex.txt')
+except Exception:
+    # Some articles do not have a bibtex file. They only contain
+    # "forewords" for special issues.
+    exit()
+
+bibtex_key = bibtex.entries[0].key
+assert re.match('^v[0-9]{3}a[0-9]{3}$', bibtex_key)
+
+# Extract data from bibtex
+title   = bibtex.entries[0].fields_dict['title'].value
+authors = bibtex.entries[0].fields_dict['author'].value
+doi     = bibtex.entries[0].fields_dict['doi'].value
+site    = bibtex.entries[0].fields_dict['URL'].value
+license = None
+
+# In bibtex, authors are written as "surname, name [and ...]"
+# therefore we split the string to get the author names
+authors = [ ' '.join([ name_part.strip() for name_part in author.split(',')[::-1] ]) for author in authors.split(' and ') ]
+
+
+# Extract additional data from HTML using XPaths
+html_tree = html.parse(f'{article_path}/index.html')
+
+license_node = html_tree.findall('.//*[@id="copyright"]//a[@rel="license"]')
+assert len(license_node) == 1
+license_url = license_node[0].get('href')
+
+if license_url == 'http://creativecommons.org/licenses/by/3.0/':
+    license = 'dokk:license:CC-BY-3.0'
+elif license_url == 'http://creativecommons.org/licenses/by-nd/2.0/':
+    license = 'dokk:license:CC-BY-ND-2.0'
+assert license
+
+pdf = html_tree.findall('.//meta[@name="citation_pdf_url"]')
+assert len(pdf) == 1
+pdf_url = pdf[0].get('content')
+assert pdf_url == f'https://theoryofcomputing.org/articles/{bibtex_key}/{bibtex_key}.pdf'
+
+# Copy the PDF file of the article to the output folder
+pdf_source_file = pdf_url[8:]
+assert os.path.isfile(pdf_source_file)
+cp_ret = subprocess.run(['cp', pdf_source_file, './pdf/theoryofcomputing.org/'])
+assert cp_ret.returncode == 0 # No errors
+
+# Create the node
+node = {
+    '@context': {
+        'library': 'dokk:vocab:library:',
+        'license': 'dokk:vocab:license:',
+        'library:journal': { '@type': '@id' },
+        'license:licensed_under': { '@type': '@id' },
+        'blob': 'dokk:vocab:blob:'
+    },
+    '@type': [
+        'library:Item',
+        'library:JournalArticle'
+    ],
+    '@id': f'dokk:theoryofcomputing_{bibtex_key}',
+    'library:author': authors,
+    'library:journal': 'dokk:theoryofcomputing',
+    'license:licensed_under': license,
+    'library:title': title,
+    'blob:at': {
+        '@id': f'file:/pdf/theoryofcomputing.org/{bibtex_key}.pdf',
+        'blob:primary_source': pdf_url,
+        'blob:retrieval_date': f'{datetime.now().year}-{datetime.now().month:02d}-{datetime.now().day:02d}'
+    }
+}
+
+# Save node to file
+with open(f'nodes/theoryofcomputing_{bibtex_key}.jsonld', 'w') as file:
+    json.dump(node, file, indent=4, ensure_ascii=False)
+
+print(f'[done] {bibtex_key}')
+
+exit()
+
+
+authors = html_root.findall('.//*[@id="authorline"]//a')
+
+issn    = html_root.findall('.//meta[@name="citation_issn"]')
+lang    = html_root.findall('.//meta[@name="citation_language"]')
+site    = html_root.findall('.//meta[@name="citation_abstract_html_url"]')
+
+assert len(authors) > 0
+assert len(issn) == 1
+assert len(lang) > 0
+assert len(site) == 1