Papa Parse
Parse CSV, TSV, or any delimited separated value file or string in javascript.
Warning! Contains Strong Language... & A Bunch Of Bad Rap Tropes
Introduction
Papa Parse can convert CSV, TSV, or delimited separated value file into javascript arrays or objects. Now you can work with /.*SV/ files client-side or server-side for web applications.
What This Tutorial Covers
What This Tutorial Covers
- Using Papa Parse
- The New Transform Function
- Benchmarking
What You Need For This Tutorial
What You Need For This Tutorial
A browser
Using Papa Parse
Papa ParseTo use Papa Parse, import the following file:
<script src="https://cdn.rawgit.com/mholt/PapaParse/2f1eb8c4/papaparse.min.js"><script>
To parse a CSV or other separated value file, you just pass in the delimited string into Papa.parse(). The options are optional, although I show some of the important ones in the example below.
// our dummy CSV
let csvStr = `ONE,TWO,THREE,FOUR,FIVE
"escape""quote","escape,comma",no quotes,"double""""quote","quote"",comma"
true,123,null,undefined,"[false,456,null]"
true,123,null,undefined,{"one":"two"}`;
// running Papa Parse, you just pass it your csv.
let csv = Papa.parse(csvStr,{
delimiter: "", // auto-detect
newline: "", // auto-detect
quoteChar: '"',
escapeChar: '"',
header: false, // creates array of {head:value}
dynamicTyping: false, // convert values to numbers if possible
skipEmptyLines: true
});
// the arrays of csv fields are in the data property
console.log(csv.data);
The New Transform Function
I added a new
Below is an example where a transform function attempts to convert CSV fields to javascript values using
Benchmarking
You can get an idea of how fast Papa Parse will parse your CSV by passing some dummy data through it and timing it. I created the following function to generate random CSV strings.
function Randos() {
_this = this;
_private = {};
_private.randomTypeChoices = ["[1,2,3]",'"{""one"":1,""two"":true,""three"":null}"',"null","undefined","true","false","True","False","TRUE","FALSE"];
_private.randomChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
_private.randomNumbers = "0123456789";
_this.randoType = function() {
return _private.randomTypeChoices[Math.floor(Math.random() * _private.randomTypeChoices.length)];
};
_this.randoString = function(length) {
var text = "";
for (var i = 0; i < length; i++)
text += _private.randomChars.charAt(Math.floor(Math.random() * _private.randomChars.length));
return text;
};
_this.randoNumber = function(length) {
var text = "";
for (var i = 0; i < length; i++)
text += _private.randomNumbers.charAt(Math.floor(Math.random() * _private.randomNumbers.length));
return text;
};
_this.randoCsv = function(lines) {
var csvArray = [];
for(var i = 0; i < lines; i++) {
var tmpArray = [];
tmpArray.push(_this.randoType());
tmpArray.push(_this.randoString(8));
tmpArray.push(_this.randoString(5));
tmpArray.push(_this.randoNumber(4));
tmpArray.push(_this.randoString(2));
tmpArray.push(_this.randoNumber(21));
tmpArray.push(_this.randoType());
csvArray.push(tmpArray.join(',') + '\n');
}
return csvArray.join('');
};
};
You can use the following code to get performance stats by running dummy data through Papa Parse. In this case, I've set it to generate 1 million rows of 7 column CSV data (you can change the number of columns by editing the randoCsv function). When I ran my own performance tests, Papa Parse could do 100,000 lines of 7 column data in about 0.35 seconds.
var randos = new Randos();
var avg = 0;
var cnt = 0;
while(cnt < 10) {
var csv = randos.randoCsv(1e6);
var start = randos.randoString(8);
var finish = randos.randoString(8);
var entry = start + ' to ' + finish;
performance.mark(start);
Papa.parse(csv);
performance.mark(finish);
performance.measure(entry, start, finish);
avg += performance.getEntriesByName(entry)[0].duration
cnt++
}
console.log(avg/cnt);
Done!
Happy parsing.