FjordWarden
FjordWarden16mo ago

Read CSV files inside zip file using the web streams API

I need to read a number of csv files inside a zip file. The basic example I've got working so far is the following:
import {BlobReader,ZipReader,ZipWriter} from 'https://deno.land/x/zipjs/index.js'
import {readCSV} from 'https://deno.land/x/csv/mod.ts'
import {CsvParseStream} from 'https://deno.land/std@0.201.0/csv/mod.ts'

const srcFile = await Deno.open('/data.zip')
const zipReader = new ZipReader(srcFile)
for (let entry of (await zipReader.getEntries())) {
console.log('filename ', entry.filename)
let csvStream = new CsvParseStream({skipFirstRow:false,columns:undefined})
if (entry.filename.startsWith('code')) {
await entry.getData(csvStream.writable)
for await (const chunk of csvStream.readable) {
console.log(chunk)
}
}
}
import {BlobReader,ZipReader,ZipWriter} from 'https://deno.land/x/zipjs/index.js'
import {readCSV} from 'https://deno.land/x/csv/mod.ts'
import {CsvParseStream} from 'https://deno.land/std@0.201.0/csv/mod.ts'

const srcFile = await Deno.open('/data.zip')
const zipReader = new ZipReader(srcFile)
for (let entry of (await zipReader.getEntries())) {
console.log('filename ', entry.filename)
let csvStream = new CsvParseStream({skipFirstRow:false,columns:undefined})
if (entry.filename.startsWith('code')) {
await entry.getData(csvStream.writable)
for await (const chunk of csvStream.readable) {
console.log(chunk)
}
}
}
The problem is that the chunk is some weird array of strings of numbers and not the row I would expect.
[ "34", "67", "97", ...1356838 more items
[ "34", "67", "97", ...1356838 more items
Any help would be welcome.
1 Reply
FjordWarden
FjordWardenOP16mo ago
I've managed to make the example self containing and can run without a zip file. But I still haven't found a way I can read the unzipped file as a csv. Somehow I should be able to wrap it in some way, but the streaming api is very confusing.
import {Uint8ArrayWriter,Uint8ArrayReader,TextReader,ZipReader,ZipWriter} from 'https://deno.land/x/zipjs/index.js'
import {readCSV,CSVReader} from 'https://deno.land/x/csv/mod.ts'
import {CsvParseStream} from 'https://deno.land/std@0.201.0/csv/mod.ts'


async function streamToString(stream) {
const {done,value} = await stream.getReader().read()
return done ? '' : new TextDecoder().decode(value,{stream:true})
}

const zipWriter = new ZipWriter(new Uint8ArrayWriter());
await Promise.all([
zipWriter.add("a.csv", new TextReader("a,b,c\nd,e,f")),
zipWriter.add("b.csv", new TextReader("g,h,i\nj,k,l"))])
const zipFile = await zipWriter.close()
const zipReader = new ZipReader(new Uint8ArrayReader(zipFile))
for (const entry of await zipReader.getEntries()) {
console.log('file ', entry.filename)
const transformStream = new TransformStream()
const promiseText = streamToString(transformStream.readable)
await entry.getData(transformStream.writable)
const text = await promiseText
console.log(text)
}
await zipReader.close()
import {Uint8ArrayWriter,Uint8ArrayReader,TextReader,ZipReader,ZipWriter} from 'https://deno.land/x/zipjs/index.js'
import {readCSV,CSVReader} from 'https://deno.land/x/csv/mod.ts'
import {CsvParseStream} from 'https://deno.land/std@0.201.0/csv/mod.ts'


async function streamToString(stream) {
const {done,value} = await stream.getReader().read()
return done ? '' : new TextDecoder().decode(value,{stream:true})
}

const zipWriter = new ZipWriter(new Uint8ArrayWriter());
await Promise.all([
zipWriter.add("a.csv", new TextReader("a,b,c\nd,e,f")),
zipWriter.add("b.csv", new TextReader("g,h,i\nj,k,l"))])
const zipFile = await zipWriter.close()
const zipReader = new ZipReader(new Uint8ArrayReader(zipFile))
for (const entry of await zipReader.getEntries()) {
console.log('file ', entry.filename)
const transformStream = new TransformStream()
const promiseText = streamToString(transformStream.readable)
await entry.getData(transformStream.writable)
const text = await promiseText
console.log(text)
}
await zipReader.close()
Also this example hangs after executing.