Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagejs
titleExample EF data for basic features for a single page
{  "id":"loc.ark:/13960/t1fj34w02",
   "metadata":{
      "schemaVersion":"1.2",
      "dateCreated":"2015-02-12T13:30",
      "title":"Shakespeare's Romeo and Juliet,",
      "pubDate":"1920",
      "language":"eng",
      "htBibUrl":"http://catalog.hathitrust.org/api/volumes/full/htid/loc.ark:/13960/t1fj34w02.json",
      "handleUrl":"http://hdl.handle.net/2027/loc.ark:/13960/t1fj34w02",
      "oclc":"",
      "imprint":"Scott Foresman and company, [c1920]"
   },
   "features":{
      "schemaVersion":"2.0",
      "dateCreated":"2015-02-20T11:31",
      "pageCount":230,
      "pages":[
        {"seq":"00000015",
          “tokenCount":212,
          "lineCount":38,
          "emptyLineCount":10,
          "sentenceCount":7,
          "languages":[{"en":"1.00"}],
          "header":{
             "tokenCount":7,
             "lineCount":3,
             "emptyLineCount":1,
             "sentenceCount":1,
             "tokenPosCount":{
                "I.":{"NN":1},
                "THE":{"DT":1},
                "INTRODUCTION":{"NN":1},
                "DRAMA":{"NNPS":1},
                "SHAKESPEARE":{"NNP":1},
                "ENGLISH":{"NNP":1},
                "AND":{"CC":1}}},
          "body":{
             "tokenCount":205,
             "lineCount":35,
             "emptyLineCount":9,
             "sentenceCount":6,
             "tokenPosCount":{
                "striking":{"JJ":1},
                "his":{"PRP$":1},
                 "plays":{"NNS":1},
                "London":{"NNP":1},
                "four":{"CD":1},
                ".":{".":7},
                "dramatic":{"JJ":2},
                "1576":{"CD":1},
                "stands":{"VBZ":1},
                ...
                "growth":{"NN":1}
             }
          },
          "footer":{
             "tokenCount":0,
             "lineCount":0,
             "emptyLineCount":0,
                    "sentenceCount":0,
                    "tokenPosCount":{}}}]}}

 

Simple use cases for the EF (extracted features) data in analysis at scale