Changes

Jump to navigation Jump to search
898 bytes added ,  17:21, 1 August 2017
SELECT * FROM firmbase1 WHERE undisclosedflag = 0;
--14145
 
Instead we chose to use only firmname as the key because there were not too many duplicates. We remove the duplicates by selecting the lesser foundingdate.
DROP TABLE firmbaseduplicates;
CREATE TABLE firmbaseduplicates AS
SELECT *, COUNT(*)
FROM (SELECT firmname FROM firmbase1 WHERE undisclosedflag = 0)a
GROUP BY firmname
HAVING COUNT(*) > 1;
--12
 
DROP TABLE firmbaseinclude;
CREATE TABLE firmbaseinclude AS
SELECT f.firmname, MAX(f.foundingdate) AS foundingdate
FROM firmbase1 AS f
INNER JOIN firmbaseduplicates AS d ON f.firmname = d.firmname
GROUP BY f.firmname;
--12
 
DROP TABLE firmbasecore;
CREATE TABLE firmbasecore AS
SELECT l.*
FROM firmbase1 AS l
LEFT JOIN firmbaseinclude AS r ON r.firmname = l.firmname AND r.foundingdate = l.foundingdate
WHERE r.firmname IS NULL AND undisclosedflag = 0;
--14133
 
SELECT COUNT(DISTINCT firmname) FROM firmbasecore;
--14133
==Cleaning fundbase==

Navigation menu