![build](https://gitlab.com/jerplab/xml-stream-js)
![coverage](https://gitlab.com/jerplab/xml-stream-js)
![badge-npmversion](https://www.npmjs.com/package/@jerp/xml-stream-js)
![jsdelivr](https://www.jsdelivr.com/package/npm/@jerp/xml-stream-js)

xml-stream-js

A simple js parser for XML for nodejs and the browser.

It will handle smoothly line-breaks, spaces, chunks (from stream), namespaces...

It is fast and optimized for Buffer or Uint8Array of encoded strings (utf8) but will take strings too.

Using the parser

Parses some (or all) xml elements. Uses the tokenizer bellow but facilitate state managment (in between 2 writes).

$3

To get started.

#### Example 1

``javascript const aString = 'some text a0more text' const docParser = new DocumentParser(new Tokenizer()) // parsing the whole document (this might not the best usecase for this library) docParser.onRoot(XmlElementParser()) // parser preserving child node order docParser.write(aString) const a = docParser.next() // XmlElement a.toString() // produces back this xml string a.getAttribute('id') === 'a0'`

#### Example 2

`javascript const a0String = 'some text a0 & more text' const a1String = 'some text a1' const b0String = 'some text b0' const docParser = new DocumentParser(new Tokenizer()) docParser.on('root/a', XmlToObject()) docParser.on('root/b', XmlElementParser()) docParser.write(${a0String}${a1String}${b0String}) const a0 = docParser.next() // object const a1 = docParser.next() // object const b = docParser.next() // XmlElement const u = docParser.next() // undefined a0.id === 'a0' a0.aa[0].id === 'aa0'`

`$3`

`javascript const xmlStr = '...' const docParser = new DocumentParser(new Tokenizer()) docParser.on('root/a', { onStart(startTag) { // object representingareturn { title: startTag.getAttribute('title'), items: [], } }, onText(text, a) { if (!a.firstText) a.firstText = text.textContent.trim() }, onEnd: (a) => a, // this is the object returned bydocParser.next()onChild(startTag) { switch (startTag.tagName) { case 'aa': { // returning a new parser for 'aa' return { onStart(startTag, parentCtx) { parentCtx.items.push({ name: startTag.getAttribute('id'), label: startTag.getAttribute('label'), type: 'aa', }) return false // skipping child nodes ofaa}, } } // only interesed inaa children of adefault: return false } }, }) docParser.write(xmlStr) console.log(docParser.next()) { title: '...', firstText: 'some text a0', items: [ { type: 'aa', name: 'aa00', label: 'item aa00' }, { type: 'aa', name: 'aa01', label: 'item aa01' }, ], }`

`Using the tokenizer`

`javascript import { Tokenizer } from '@jerp/xml-stream-js' const tokenizer = new Tokenizer() let token // any of StartTag | EndTag | Text | CDATA | undefined (undifined meaning end-of-chunk) const tokens = [] // collected tokens try { // write the first chunk of the xml string tokenizer.write('someinn') while ((token = tokenizer.nextToken())) { tokens.push(token) } // write the last chunk of the xml string tokenizer.write('ertext') while ((token = tokenizer.nextToken())) { tokens.push(token) } } catch (e) { // will not happen in this case, but will if xml string is corrupted } tokens[0].tagName === 'a' tokens[1].getAttribute('b1') === 'value b1' tokens.join('') === 'someinnertext' tokenizer.exhausted === true // the whole string has been consumed tokens.join('') // === 'someinnertext'``

Using the parser

Parses some (or all) xml elements. Uses the tokenizer bellow but facilitate state managment (in between 2 writes).

To get started.

#### Example 1

javascript
const aString = 'some text a0more text'
const docParser = new DocumentParser(new Tokenizer())
// parsing the whole document (this might not the best usecase for this library)
docParser.onRoot(XmlElementParser()) // parser preserving child node order
docParser.write(aString)
const a = docParser.next() // XmlElement
a.toString() // produces back this xml string
a.getAttribute('id') === 'a0'

#### Example 2

javascript
const a0String = 'some text a0 & more text'
const a1String = 'some text a1'
const b0String = 'some text b0'
const docParser = new DocumentParser(new Tokenizer())
docParser.on('root/a', XmlToObject())
docParser.on('root/b', XmlElementParser())
docParser.write(

${a0String}${a1String}${b0String}

)
const a0 = docParser.next() // object
const a1 = docParser.next() // object
const b = docParser.next() // XmlElement
const u = docParser.next() // undefined
a0.id === 'a0'
a0.aa[0].id === 'aa0'

$3

javascript
const xmlStr = '...'
const docParser = new DocumentParser(new Tokenizer())
docParser.on('root/a', {
  onStart(startTag) {
    // object representing


    return {
      title: startTag.getAttribute('title'),
      items: [],
    }
  },
  onText(text, a) {
    if (!a.firstText) a.firstText = text.textContent.trim()
  },
  onEnd: (a) => a, // this is the object returned by

docParser.next()


  onChild(startTag) {
    switch (startTag.tagName) {
      case 'aa': {
        // returning a new parser for 'aa'
        return {
          onStart(startTag, parentCtx) {
            parentCtx.items.push({
              name: startTag.getAttribute('id'),
              label: startTag.getAttribute('label'),
              type: 'aa',
            })
            return false // skipping child nodes of


          },
        }
      }
      // only interesed in

aa children of a


      default:
        return false
    }
  },
})
docParser.write(xmlStr)
console.log(docParser.next())
{
  title: '...',
  firstText: 'some text a0',
  items: [
    { type: 'aa', name: 'aa00', label: 'item aa00' },
    { type: 'aa', name: 'aa01', label: 'item aa01' },
  ],
}

Using the tokenizer

javascript
import { Tokenizer } from '@jerp/xml-stream-js'
const tokenizer = new Tokenizer()
let token // any of StartTag | EndTag | Text | CDATA | undefined (undifined meaning end-of-chunk)
const tokens = [] // collected tokens
try {
  // write the first chunk of the xml string
  tokenizer.write('someinn')
  while ((token = tokenizer.nextToken())) {
    tokens.push(token)
  }
  // write the last chunk of the xml string
  tokenizer.write('ertext')
  while ((token = tokenizer.nextToken())) {
    tokens.push(token)
  }
} catch (e) {
  // will not happen in this case, but will if xml string is corrupted
}
tokens[0].tagName === 'a'
tokens[1].getAttribute('b1') === 'value b1'
tokens.join('') === 'someinnertext'
tokenizer.exhausted === true // the whole string has been consumed
tokens.join('') // === 'someinnertext'