|
| 1 | +import fs from 'fs/promises' |
| 2 | +import path from 'path' |
| 3 | +import { XMLParser } from 'fast-xml-parser' |
| 4 | + |
| 5 | +const basePath = import.meta.dirname |
| 6 | + |
| 7 | +const fetchProgress = async (input, init) => { |
| 8 | + const response = await fetch(input, init) |
| 9 | + let loaded = 0 |
| 10 | + let total = response.headers.get('content-length') |
| 11 | + let prevPrintTime = 0 |
| 12 | + const units = ['B', 'KiB', 'MiB', 'GiB'] |
| 13 | + |
| 14 | + return new Response( |
| 15 | + new ReadableStream({ |
| 16 | + pull: async controller => { |
| 17 | + for await (const chunk of response.body) { |
| 18 | + controller.enqueue(chunk) |
| 19 | + loaded += chunk.byteLength |
| 20 | + const now = Date.now() |
| 21 | + if (now - prevPrintTime > 1000) { |
| 22 | + prevPrintTime = now |
| 23 | + if (total) { |
| 24 | + console.info(loaded + ' / ' + total + ' bytes') |
| 25 | + } else { |
| 26 | + let uidx = 0 |
| 27 | + let val = loaded |
| 28 | + while (val > 1000 && uidx + 1 < units.length) { |
| 29 | + val /= 1024 |
| 30 | + uidx++ |
| 31 | + } |
| 32 | + console.info(Math.floor(val * 100) / 100 + ' ' + units[uidx]) |
| 33 | + } |
| 34 | + } |
| 35 | + } |
| 36 | + controller.close() |
| 37 | + }, |
| 38 | + }) |
| 39 | + ) |
| 40 | +} |
| 41 | + |
| 42 | +const getCatalogue = async () => { |
| 43 | + console.log('Fetching catalogue...') |
| 44 | + let res = null |
| 45 | + let waitTime = 0.5 |
| 46 | + for (let i = 0; i < 100; i++) { |
| 47 | + try { |
| 48 | + res = await fetchProgress('https://ec.europa.eu/eurostat/api/dissemination/catalogue/toc/xml') |
| 49 | + break |
| 50 | + } catch (error) { |
| 51 | + await new Promise(resolve => setTimeout(resolve, waitTime)) |
| 52 | + waitTime = Math.min(waitTime * 2, 60) |
| 53 | + } |
| 54 | + } |
| 55 | + if (!res) { |
| 56 | + throw new Error('Failed to fetch catalogue') |
| 57 | + } |
| 58 | + const catalogues = await res.text() |
| 59 | + const parser = new XMLParser({ |
| 60 | + ignoreAttributes: false, |
| 61 | + }) |
| 62 | + const doc = parser.parse(catalogues, 'application/xml') |
| 63 | + const themes = doc['nt:tree']['nt:branch'][0] |
| 64 | + |
| 65 | + const root = {} |
| 66 | + const getLeafs = (node, depth, obj) => { |
| 67 | + obj.title = node['nt:title'][0]['#text'] |
| 68 | + obj.children = [] |
| 69 | + const branch = node['nt:children']['nt:branch'] |
| 70 | + if (branch) { |
| 71 | + if (Array.isArray(branch)) { |
| 72 | + for (const b of branch) { |
| 73 | + const c = {} |
| 74 | + obj.children.push(c) |
| 75 | + getLeafs(b, depth + 1, c) |
| 76 | + } |
| 77 | + } else { |
| 78 | + const c = {} |
| 79 | + obj.children.push(c) |
| 80 | + getLeafs(branch, depth + 1, c) |
| 81 | + } |
| 82 | + } else { |
| 83 | + const leafs = node['nt:children']['nt:leaf'] |
| 84 | + if (Array.isArray(leafs)) { |
| 85 | + for (const leaf of leafs) { |
| 86 | + obj.children.push({ |
| 87 | + title: leaf['nt:title'][0]['#text'], |
| 88 | + code: leaf['nt:code'], |
| 89 | + }) |
| 90 | + } |
| 91 | + } else { |
| 92 | + obj.children.push({ |
| 93 | + title: leafs['nt:title'][0]['#text'], |
| 94 | + code: leafs['nt:code'], |
| 95 | + }) |
| 96 | + } |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + getLeafs(themes, 0, root) |
| 101 | + |
| 102 | + const readableStream = new Blob([JSON.stringify(root)]).stream() |
| 103 | + const compressedStream = readableStream.pipeThrough(new CompressionStream('gzip')) |
| 104 | + const arrayBuffer = await new Response(compressedStream).arrayBuffer() |
| 105 | + fs.writeFile(path.join(basePath, 'catalogue.json.gz'), Buffer.from(arrayBuffer)) |
| 106 | +} |
| 107 | + |
| 108 | +const getMetabase = async () => { |
| 109 | + console.log('Fetching metabase...') |
| 110 | + let res = null |
| 111 | + let waitTime = 0.5 |
| 112 | + for (let i = 0; i < 100; i++) { |
| 113 | + try { |
| 114 | + res = await fetchProgress('https://ec.europa.eu/eurostat/api/dissemination/catalogue/metabase.txt.gz') |
| 115 | + break |
| 116 | + } catch (error) { |
| 117 | + await new Promise(resolve => setTimeout(resolve, waitTime)) |
| 118 | + waitTime = Math.min(waitTime * 2, 60) |
| 119 | + } |
| 120 | + } |
| 121 | + if (!res) { |
| 122 | + throw new Error('Failed to fetch metadata') |
| 123 | + } |
| 124 | + const decompressedStream = res.body.pipeThrough(new DecompressionStream('gzip')) |
| 125 | + const text = await new Response(decompressedStream).text() |
| 126 | + |
| 127 | + const data = {} |
| 128 | + for (const line of text.split('\n')) { |
| 129 | + if (line.trim() === '') { |
| 130 | + continue |
| 131 | + } |
| 132 | + const [id, unit, value] = line.split('\t') |
| 133 | + if (!data[id]) { |
| 134 | + data[id] = {} |
| 135 | + } |
| 136 | + if (!data[id][unit]) { |
| 137 | + data[id][unit] = [] |
| 138 | + } |
| 139 | + data[id][unit].push(value) |
| 140 | + } |
| 141 | + |
| 142 | + const readableStream = new Blob([JSON.stringify(data)]).stream() |
| 143 | + const compressedStream = readableStream.pipeThrough(new CompressionStream('gzip')) |
| 144 | + const arrayBuffer = await new Response(compressedStream).arrayBuffer() |
| 145 | + fs.writeFile(path.join(basePath, 'metabase.json.gz'), Buffer.from(arrayBuffer)) |
| 146 | +} |
| 147 | + |
| 148 | +await getMetabase() |
| 149 | +await getCatalogue() |
0 commit comments