22/27: gnu: Add Lingua-EN-Tagger.

From: Eric Bavier
Subject: 22/27: gnu: Add Lingua-EN-Tagger.
Date: Tue, 17 Mar 2015 18:15:25 +0000

bavier pushed a commit to branch master
in repository guix.

commit d7de088395e10ec0f35584afd50bc63b3a6d18c6
Author: Eric Bavier <address@hidden>
Date:   Tue Mar 17 11:03:31 2015 -0500

    gnu: Add Lingua-EN-Tagger.
    * gnu/packages/language.scm (perl-lingua-en-tagger): New variable.
 gnu/packages/language.scm |   33 ++++++++++++++++++++++++++++++++-
 1 files changed, 32 insertions(+), 1 deletions(-)

diff --git a/gnu/packages/language.scm b/gnu/packages/language.scm
index fff651b..2c080cc 100644
--- a/gnu/packages/language.scm
+++ b/gnu/packages/language.scm
@@ -19,9 +19,10 @@
 (define-module (gnu packages language)
   #:use-module (gnu packages)
   #:use-module (gnu packages perl)
+  #:use-module (gnu packages web)
   #:use-module (guix packages)
   #:use-module (guix build-system perl)
-  #:use-module ((guix licenses) #:select (gpl2))
+  #:use-module ((guix licenses) #:select (gpl2 gpl3))
   #:use-module (guix download))
 (define-public perl-lingua-en-findnumber
@@ -114,6 +115,36 @@ converting a word to singular or plural.")
 digits, is a cardinal or ordinal number.")
     (license (package-license perl))))
+(define-public perl-lingua-en-tagger
+  (package
+    (name "perl-lingua-en-tagger")
+    (version "0.24")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append "mirror://cpan/authors/id/A/AC/ACOBURN/"
+                           "Lingua-EN-Tagger-" version ".tar.gz"))
+       (sha256
+        (base32
+         "0qksqh1zi8fz76a29s2ll4g6yr8y6agmzgq7ngccvgj3gza5q241"))))
+    (build-system perl-build-system)
+    (propagated-inputs
+     `(("perl-memoize-expirelru" ,perl-memoize-expirelru)
+       ("perl-lingua-stem" ,perl-lingua-stem)
+       ("perl-html-parser" ,perl-html-parser)
+       ("perl-html-tagset" ,perl-html-tagset)))
+    (home-page "";)
+    (synopsis "Part-of-speech tagger for English natural language processing")
+    (description "This module is a probability based, corpus-trained tagger
+that assigns part-of-speech tags to English text based on a lookup dictionary
+and a set of probability values.  The tagger assigns appropriate tags based on
+conditional probabilities - it examines the preceding tag to determine the
+appropriate tag for the current word.  Unknown words are classified according
+to word morphology or can be set to be treated as nouns or other parts of
+speech.  The tagger also extracts as many nouns and noun phrases as it can,
+using a set of regular expressions.")
+    (license gpl3)))
 (define-public perl-lingua-en-words2nums
     (name "perl-lingua-en-words2nums")

