...
| |
---|
<container>_id | serial primary key |
publication_id | integer not null |
modified_load_id | integer not null |
status | character(1) (v=valid XML, i=invalid XML) |
content | XML |
The XML is always in the content
(PostgreSQL type: XML) column.
...
select content
from xml.t_publication_reference
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
--------
<publication-reference fvid= "71329085" ucid= "US-5551212-A" >
<document-id>
<country>US</country>
<doc-number> 5551212 </doc-number>
<kind>A</kind>
<date> 19960903 </date>
<lang>EN</lang>
</document-id>
</publication-reference>
|
...
...
Info |
---|
The invaluable utility |
...
...
...
...
...
...
| |
---|
string
xml.f_textContent(xpath::text,xml)
| Extract text of node:
select xml.f_textContent( './document-id/lang' , content)
from xml.t_publication_reference
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
----
EN
|
|
string
xml_f_getAttribute(att::text,xml)
| Extract single attribute value:
select xml.f_getAttribute( 'ucid' , content)
from xml.t_publication_reference
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
----
US- 5551212 -A
|
|
boolean
xml.f_nodeExists(xpath::text,xml)
| Test presence of a particular node:
select xml.f_nodeExists( '//date' , content)
from xml.t_publication_reference
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
----
t
|
|
XML
xml.f_findNode(xpath::text,xml)
| Select a node (type XML) satisfying the given xpath expression:
select xml.f_findNode( './main-classification' , content)
from xml.t_classification_national
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
----
<main-classification mxw-id= "PCL893513162" load-source= "mcf" > 053397 </main-classification>
|
|
XML[]
xml.f_findNodes(xpath::text,xml)
| Select an array of nodes satisfying the given xpath expression:
select xml.f_findNodes( './classification-ipcr' , content)
from xml.t_classifications_ipcr
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
----
{
"<classification-ipcr ..." ,
"<classification-ipcr ..." ,
"<classification-ipcr ..."
}
|
|
XML[]
xml.f_getElementsByTagName(name::text,xml))
| Select an array of nodes based on element name:
select xml.f_getElementsByTagName( 'classification-ipcr' , content)
from xml.t_classifications_ipcr
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
----
{
"<classification-ipcr ..." ,
"<classification-ipcr ..." ,
"<classification-ipcr ..."
}
|
|
string
xml.f_nodeName(xml)
| Select the name of the content node:
select xml.f_nodeName( content )
from xml.t_invention_title
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
-----------------
invention-title
|
|
XML[]
xml.f_childNodes(xml)
| Select all child nodes:
select xml.f_childNodes( content )
from xml.t_application_reference
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
-----------------------------------------------------------------------------------
<document-id mxw-id= "PAPP60364944" load-source= "docdb" format= "epo" >
<country>US</country>
<doc-number> 2582893 </doc-number>
<kind>A</kind>
<date> 19930303 </date>
<lang>EN</lang>
</document-id>
<document-id mxw-id= "PAPP84571955" load-source= "patent-office" format= "original" >
<country>US</country>
<doc-number> 08025828 </doc-number>
<date> 19930303 </date>
<lang>EN</lang>
</document-id>
|
|
string
xml.f_node2string(xml,indent)
| Concatenate all child text nodes:
select xml.f_node2string( content::text, 1 )
from xml.t_abstract
where publication_id=xml.f_ucid2id( 'US-5551212-A' );
-----------------------------------------------------------------------------------
A package is formed by holding a nonextensible strip taut and wrapping it around the upper and/or lower periphery of the bundle and joining the ends of the strip segments e.g. by adhesive bonding. A projecting margin of the strip is folded to overlie the bundle or underlie the bundle so that an L-configuration is imparted to the frame formed around the bundle after the strip is applied thereto.
|
|
...
Now we can create a function that returns a TABLE
of these properties. We'll use publication_id
as input to the function and create it in a private schema
(mySchema).
create or replace function mySchema.f_rdb_citations( integer )
returns table (
publication_id integer,
ref_publication_id integer,
ref_ucid varchar( 64 ),
published date,
country varchar( 2 ),
kind varchar( 2 ),
load_source varchar( 32 ),
format varchar( 32 ),
source varchar( 32 )
) as
$BODY$
declare v_content xml;
v_node xml;
v_doc_node xml;
begin
-- example usage:
-- select * from mySchema.f_rdb_citations( xml.f_ucid2id( 'US-9240001-B2' ) );
select x.publication_id, x.content into publication_id, v_content
from xml.t_citations as x where x.publication_id=$ 1 ;
if not found then
return ;
end if ;
-- loop through all patent citations
for each v_node in array
( select xml.f_findNodes( '//patcit' , v_content) ) loop
select xml.f_getAttribute( 'ucid' , v_node ) into ref_ucid;
select x.publication_id into ref_publication_id
from xml.t_patent_document_values as x
where ucid=xml.f_getAttribute( 'ucid' , v_node );
select xml.f_getAttribute( 'load-source' , v_node ) into load_source;
select xml.f_getAttribute( 'name' , xml.f_findNode( './sources/source' , v_node ) ) into source;
-- loop through all forms of the document-id(s)
for each v_doc_node in array
( select xml.f_findNodes( './document-id' , v_node) ) loop
select xml.f_getAttribute( 'format' , v_doc_node ) into format;
select xml.f_textContent( './country' , v_doc_node ) into country;
select xml.f_textContent( './kind' , v_doc_node ) into kind;
select xml.f_textContent( './date' , v_doc_node ) into published;
return next;
end loop;
end loop;
end;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
ROWS 1000 ;
|
...
Now, we test the results:
select * from mySchema.f_rdb_citations( xml.f_ucid2id( 'US-9240001-B2' ) ) limit 5 ;
publication_id | ref_publication_id | ref_ucid | published | country | kind | load_source | format | source
----------------+--------------------+-------------------+------------+---------+------+-------------+--------+--------
160817649 | 5778519 | CA- 2580978 -A1 | 2006 - 03 - 30 | CA | A1 | docdb | epo | APP
160817649 | 127402642 | CN- 201918032 -U | 2011 - 08 - 03 | CN | U | docdb | epo | APP
160817649 | 75847915 | US- 20020150866 -A1 | 2002 - 10 - 17 | US | A1 | docdb | epo | APP
160817649 | 75947470 | US- 20030060942 -A1 | 2003 - 03 - 27 | US | A1 | docdb | epo | APP
160817649 | 76046244 | US- 20030154010 -A1 | 2003 - 08 - 14 | US | A1 | docdb | epo | APP
|
...
Of course, GROUP BY
and ORDER BY
are available on the returned columns. In addition
...
to SELECTing citations for a single document, we can also JOIN
to table expressions, e.g., assume we are interested in the top 5 cited countries from US grant publication date 2016-05-03:
...
select count(*) as n, t2.country as ref_country
from xml.t_patent_document_values t1
inner join mySchema.f_rdb_citations (t1.publication_id) as t2
on (t1.publication_id=t2.publication_id)
where t1.country= 'US'
and t1.published= '2016-05-03'
group by t2.country
order by n desc
limit 5 ;
n | ref_country
--------+---------
221800 | US
14285 | WO
13313 | JP
7115 | EP
2554 | CN |