koha-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Koha-cvs] CVS: koha/misc/migration_tools build_authorities.pl,NONE,1.1.


From: Paul POULAIN
Subject: [Koha-cvs] CVS: koha/misc/migration_tools build_authorities.pl,NONE,1.1.2.1
Date: Thu, 11 Aug 2005 06:51:35 -0700

Update of /cvsroot/koha/koha/misc/migration_tools
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27750/misc/migration_tools

Added Files:
      Tag: rel_2_2
        build_authorities.pl 
Log Message:
new script to rebuild authorities after a bulkmarcimport.pl

--- NEW FILE ---
#!/usr/bin/perl
# script that rebuild thesaurus from biblio table.

use strict;

# Koha modules used
use MARC::File::USMARC;
use MARC::Record;
use MARC::Batch;
use C4::Context;
use C4::Biblio;
use C4::AuthoritiesMarc;
use Time::HiRes qw(gettimeofday);

use Getopt::Long;
my ( $input_marc_file, $number) = ('',0);
my ($version, $verbose, $delete, $confirm, $howmany);
GetOptions(
    'h' => \$version,
    'd' => \$delete,
    'v' => \$verbose,
    'c' => \$confirm,
# this $howmany parameter & other commented code was here to enable incremental 
building of the authorities, but it does not work well.
#       'n:s' => \$howmany,
);

if ($version || (!$confirm)) {
        print <<EOF
Script to recreate a authority tables into Koha from biblios
parameters :
\th : this version/help screen
\tc : confirm. this script run without -c shows this help, pls run it with -c 
to execute it
\tv : verbose mode.
\td : delete the thesaurus before doing work. This deleting is smart enough to 
delete only the categories to rebuild. However, it is quite slow. Don''t be 
surprised...
\tn X : do only X entries, then stop. As the script is incremental, you can 
create authorities X by X until done.

BEFORE RUNNING this script, you MUST edit it & adapt the %whattodo hash to fit 
your needs. It contains :
* as key, the code of the authority to be created. It's the one you've choosen 
(or will choose) in Koha >> parameters >> thesaurus structure >> add). It can 
be whatever you want. NP/CO/NG/TI/NC in CVS refers to UNIMARC french RAMEAU 
category codes.
* in values a sub-hash with the following values :
\ttaglist : the list of MARC tags using this authority
\tkey : the list of MARC subfields used as key for authority. 2 entries in 
biblio having the same key will be considered as the same.
\tother : the list of MARC subfields not used as key, but to be copied in 
authority.
\tauthtag : the field in authority that will be reported in biblio. Remember 
that all subfields in tag "authtag" will be reported in the same subfield of 
the biblio (in MARC tags that are in "taglist")

don't forget to define the itemfield. In UNIMARC, it should be 995, in MARC21, 
probably 852

Any warning will be stored in the warnings.log file.
EOF
;#'
die;
}

my $dbh = C4::Context->dbh;

my $itemfield = '995'; # enter the TAG number where your items are stored.

my %whattodo = (
        # authority code (the one you've choosen (or will choose) in Koha >> 
parameters >> thesaurus structure >> add)
        NP =>   {
                                # the list of MARC tags using this authority
                                taglist => "600|700|701|702",
                                # the list of MARC subfields used as key for 
authority. 2 entries in biblio having the same key will be considered as the 
same.
                                key             => "a|b|c|d|f|x|y|z",
                                # the list of MARC subfields not used as key, 
but to be copied in authority.
                                other   => "j",
                                # the field in authority that will be reported 
in biblio. Remember that all subfields in tag "authtag" will be reported in the 
same subfield of the biblio (in MARC tags that are in "taglist")
                                authtag => "200",
                        },
        CO =>   {       taglist => "601|710|711|712",
                                key             => "a|b",
                                other   => "c|d|f|g|p",
                                authtag => "210",
                        },
        NG =>   {       taglist => "607",
                                key             => "a|x|y|z",
                                other   => "",
                                authtag => "215",
                        },
        TU =>   {       taglist => "500|605",
                                key             => "a|i|x|k|l|m|n|q|y|z",
                                other   => "",
                                authtag => "230",
                        },
        NC =>   {       taglist => "606",
                                key             => "a|x|y|z",
                                other   => "",
                                authtag => "250",
                        },
);
my %authorities;

open WARNING_FILE,">","warnings.log";

my $field_list;
my $category_list;
foreach (keys %whattodo) {
        $field_list .= $whattodo{$_}->{taglist}.'|';
        $category_list.= "'".$_."',"
}
chop $field_list;

if ($delete) {
        print "deleting thesaurus step 1\n";
        chop $category_list;
        my $del1 = $dbh->prepare("delete from auth_subfield_table where 
authid=?");
        my $del2 = $dbh->prepare("delete from auth_word where authid=?");
        my $sth = $dbh->prepare("select authid from auth_header where 
authtypecode in ($category_list)");
        $sth->execute;
        while (my ($authid) = $sth->fetchrow) {
                $del1->execute($authid);
                $del2->execute($authid);
        }
        print "deleting thesaurus step 2\n";
        $dbh->do("delete from auth_header where authtypecode in 
($category_list)");
        $dbh->do("delete from marc_subfield_table where tag in 
('".join("','",split('\|',$field_list))."') and subfieldcode='9'");
        $dbh->do("delete from marc_word where tagsubfield in 
('".join("9','",split('\|',$field_list))."9')");
#       die;
}

$|=1; # flushes output
my $starttime = gettimeofday;
my $sth = $dbh->prepare("select bibid from marc_biblio");
$sth->execute;
my $i=1;

my $modified;
my $alreadydone;
my $totalskipped;
while (my ($bibid) = $sth->fetchrow) {
        my $record = MARCgetbiblio($dbh,$bibid);
        $modified=0;
        $i++;
#       print "i : $i / howmany : $howmany\n";
#       exit if $i>$howmany;
        
        # skip what has already been done...
#       $alreadydone=0;
#       foreach my $field ($record->fields) {
#               if ($field->tag() =~ /$field_list/) {
# #                     print "F : $field_list ".$field->tag()." => 
".$field->as_formatted."\n";
#                       if ($field->subfield('9')) {
#                               $alreadydone++;
#                       } else {
#                               if ($alreadydone) {
#                                       print "ERROR : biblio partially done, 
some \$9 (authority link) missing : ".$record->as_formatted."\n======= You 
should run the script again using -d to delete everything";
#                                       die;
#                               }
#                       }
#               }
#       }
#       $totalskipped++ if $alreadydone;
#       next if $alreadydone;
#       my $timeneeded = gettimeofday - $starttime;
        print " $i in ".(gettimeofday-$starttime)." s\n" unless ($i % 100);
        # be careful, as the last entry may have been 
        # delete ITEM field, we only deal with BIBLIOS
        foreach my $field ($record->field($itemfield)) {
                $record->delete_field($field);
        }
        my $totdone=0;
#       my $authid;
        # on passe tous les champs
        foreach my $field ($record->fields) {
                foreach my $DOauthtype (keys %whattodo) {
                        my $DOtaglist = $whattodo{$DOauthtype}->{taglist};
                        my $DOkey = $whattodo{$DOauthtype}->{key};
                        my $DOother = $whattodo{$DOauthtype}->{other};
                        my $DOauthtag = $whattodo{$DOauthtype}->{authtag};
                        if ($field->tag() =~ /$DOtaglist/) {
                                # try to find the authority in %NP ...
                                # build the "key"
                                my $authPrimaryKey;
                                foreach (split '\|',$DOkey) {
                                        $authPrimaryKey .= 
join('|',$field->subfield($_))."|" if $field->subfield($_);
                                }
                                # if authority exist, check it can't be 
completed by subfields not previously seen.
                                # otherwise, create if with whatever available.
                                if 
($authorities{$DOauthtype}->{$authPrimaryKey}) {
                                        # check that the existing authority has 
all the datas. Otherwise, add them, but don't modify already parsed biblios.
                                        # at the end of the script, all 
authorities will be updated. So, the "merge_authority.pl" tool can be used to 
update all biblios.
                                        foreach my $subfieldtotest (split 
'\|',$DOother) {
                                                if 
($field->subfield($subfieldtotest)) {
                                                        if 
($authorities{$DOauthtype}->{$authPrimaryKey}->{record}->field($DOauthtag)->subfield($subfieldtotest)
 
                                                                                
                ne
                                                                                
                        $field->subfield($subfieldtotest)) {
                                                                print 
WARNING_FILE "========\nERROR ON $i $subfieldtotest authorities seems to 
differ, can't choose between : 
\n".$authorities{$DOauthtype}->{$authPrimaryKey}->{record}->field($DOauthtag)->as_formatted()."
 \n====== AND ======\n ".$field->as_formatted()."\n=======\n";
                                                                print "W";
                                                        }
                                                        # $c was not here, add 
it...
                                                        unless 
($authorities{$DOauthtype}->{$authPrimaryKey}->{record}->field($DOauthtag)->subfield($subfieldtotest))
 {
                                                                my $fieldA= 
$authorities{$DOauthtype}->{$authPrimaryKey}->{record}->field($DOauthtag)->clone();
                                                                
$fieldA->add_subfields($subfieldtotest => $field->subfield($subfieldtotest));
                                                                
$authorities{$DOauthtype}->{$authPrimaryKey}->{record}->field($DOauthtag)->replace_with($fieldA);
                                                                
$authorities{$DOauthtype}->{$authPrimaryKey}->{modified} = 1;
                                                        }
                                                }
                                        }
                                } else {
                                        my $authrecord = MARC::Record->new();
                                        my $authfield;
                                        foreach (split '\|',$DOkey) {
                                                if ($authfield) {
                                                        
$authfield->add_subfields($_ => join ('|',$field->subfield($_))) if 
$field->subfield($_);
                                                } else {
                                                        $authfield = 
MARC::Field->new( $DOauthtag,'','',$_ => join ('|',$field->subfield($_)));
                                                }
                                        }
                                        foreach (split '\|',$DOother) {
                                                if ($authfield) {
                                                        
$authfield->add_subfields($_ => join ('|',$field->subfield($_))) if 
$field->subfield($_);
                                                } else {
                                                        $authfield = 
MARC::Field->new( $DOauthtag,'','',$_ => join ('|',$field->subfield($_)));
                                                }
                                        }
                                        
$authrecord->insert_fields_ordered($authfield);
                                        my $authid = 
AUTHaddauthority($dbh,$authrecord,'',$DOauthtype);
                                        
$authorities{$DOauthtype}->{$authPrimaryKey}->{authid} = $authid;
                                        
$authorities{$DOauthtype}->{$authPrimaryKey}->{record} = $authrecord->clone;
                                        
$authorities{$DOauthtype}->{$authPrimaryKey}->{modified} = 0;
                                }
                                print "ERROR !!!! \$9 already exists in 
$authPrimaryKey / ".$field->as_formatted."\n" if $field->subfield('9');
                                my $fieldC = $field->clone();
                                $fieldC->add_subfields('9' => 
$authorities{$DOauthtype}->{$authPrimaryKey}->{authid});
                                $field->replace_with($fieldC);
        #                       print $NP{$keyNP}->{authid}." => 
".$record->as_formatted."\n";
                                $modified++;
                        }
                }
        }
#
# NC
#
# OK, done, now store modified biblio if it has been modified
        if ($modified) {
                my $frameworkcode=MARCfind_frameworkcode($dbh,$bibid);
                NEWmodbiblio($dbh,$record,$bibid,$frameworkcode);
#               print "skipped $totalskipped biblios in 
".(gettimeofday-$starttime)." s\n" if $totalskipped;
#               $totalskipped=0;
                print "$modified";
        } else {
                # if $totalskipped is not null, we are in a biblio that has no 
authorities entry, but inside an already done part of the job
                # ++ totalskipped & don't show a useless *
#               if ($totalskipped) {
#                       $totalskipped++;
#               } else {
                        print "*";
#               }
        }
}

#
# now, parse authorities & modify them if they have been modified/completed by 
a subfield not existing on the 1st biblio using this authority.
#
foreach my $authtype (keys %whattodo) {
        foreach my $authentry (keys %{$authorities{$authtype}}) {
#               print "AUTH : $authentry\n" if 
$authorities{$authtype}->{$authentry}->{modified};
                
AUTHmodauthority($dbh,$authorities{$authtype}->{$authentry}->{authid},$authorities{$authtype}->{$authentry}->{record})
 if $authorities{$authtype}->{$authentry}->{modified};
        }
}
#
my $timeneeded = gettimeofday - $starttime;
print "$i entries done in $timeneeded seconds (".($i/$timeneeded)." per 
second)\n";
close WARNING_FILE;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]