MYSQL-将数据拆分为多行

我已使用从IMDB收集信息并将其传输到MYSQL数据库的应用程序导入了一些数据。

似乎这些字段尚未标准化,并且在1个字段中包含许多值

例如:

Table Movie

MovieID Movie_Title Written_By

1 Movie1 Person1, Person2

2 Movie2 Person3

3 Movie3 Person4, Person2, Person6

有没有办法将这些值分开,然后将它们插入到另一个表中,而不重复呢?

Table Writers

WriterID Written_By MovieId

1 Person1 1

2 Person2 1

3 Person3 3

我进行了一些谷歌搜索,发现我应该使用PHP处理此数据。但是我一点都不了解PHP。

无论如何,仅使用MYSQL即可转换此​​数据?

回答:

您可以使用存储过程,该过程使用游标来解决此问题,但这不是很优雅,但也不是逗号分隔的作者列表!

以下代码是否来自类似问题,但您最好对其进行彻底检查。

希望能帮助到你 :)

mysql> select * from movies_unf;

+---------+-------------+------------------------------------------------------+

| movieID | movie_title | written_by |

+---------+-------------+------------------------------------------------------+

| 1 | movie1 | person1, person2 |

| 2 | movie2 | person3 |

| 3 | movie3 | person4, person2, person6 |

| 4 | movie4 | person4, person4, person1, person2, person1,person8, |

| 5 | movie1 | person1, person2 |

+---------+-------------+------------------------------------------------------+

5 rows in set (0.00 sec)

call normalise_movies_unf();

mysql> select * from movies;

+----------+--------+

| movie_id | title |

+----------+--------+

| 1 | movie1 |

| 2 | movie2 |

| 3 | movie3 |

| 4 | movie4 |

+----------+--------+

4 rows in set (0.00 sec)

mysql> select * from writers;

+-----------+---------+

| writer_id | name |

+-----------+---------+

| 1 | person1 |

| 2 | person2 |

| 3 | person3 |

| 4 | person4 |

| 6 | person6 |

| 12 | person8 |

+-----------+---------+

6 rows in set (0.00 sec)

mysql> select * from movie_writers;

+----------+-----------+

| movie_id | writer_id |

+----------+-----------+

| 1 | 1 |

| 1 | 2 |

| 2 | 3 |

| 3 | 2 |

| 3 | 4 |

| 3 | 6 |

| 4 | 1 |

| 4 | 2 |

| 4 | 4 |

| 4 | 12 |

+----------+-----------+

10 rows in set (0.00 sec)

drop table if exists movies_unf;

create table movies_unf

(

movieID int unsigned not null primary key,

movie_title varchar(255) not null,

written_by varchar(1024) not null

)engine=innodb;

insert into movies_unf values

(1,'movie1','person1, person2'),

(2,'movie2','person3'),

(3,'movie3','person4, person2, person6'),

(4,'movie4','person4, person4, person1, person2, person1,person8,'), -- dodgy writers

(5,'movie1','person1, person2'); -- dodgy movie

drop table if exists movies;

create table movies

(

movie_id int unsigned not null auto_increment primary key,

title varchar(255) unique not null

)engine=innodb;

drop table if exists writers;

create table writers

(

writer_id int unsigned not null auto_increment primary key,

name varchar(255) unique not null

)engine=innodb;

drop table if exists movie_writers;

create table movie_writers

(

movie_id int unsigned not null,

writer_id int unsigned not null,

primary key (movie_id, writer_id)

)engine=innodb;

drop procedure if exists normalise_movies_unf;

delimiter #

create procedure normalise_movies_unf()

begin

declare v_movieID int unsigned default 0;

declare v_movie_title varchar(255);

declare v_writers varchar(1024);

declare v_movie_id int unsigned default 0;

declare v_writer_id int unsigned default 0;

declare v_name varchar(255);

declare v_csv_done tinyint unsigned default 0;

declare v_csv_idx int unsigned default 0;

declare v_done tinyint default 0;

declare v_cursor cursor for

select distinct movieID, movie_title, written_by from movies_unf;

declare continue handler for not found set v_done = 1;

start transaction;

open v_cursor;

repeat

fetch v_cursor into v_movieID, v_movie_title, v_writers;

set v_movie_title = trim(v_movie_title);

set v_writers = replace(v_writers,' ', '');

-- insert the movie

insert ignore into movies (title) values (v_movie_title);

select movie_id into v_movie_id from movies where title = v_movie_title;

-- split the out the writers and insert

set v_csv_done = 0;

set v_csv_idx = 1;

while not v_csv_done do

set v_name = substring(v_writers, v_csv_idx,

if(locate(',', v_writers, v_csv_idx) > 0,

locate(',', v_writers, v_csv_idx) - v_csv_idx,

length(v_writers)));

set v_name = trim(v_name);

if length(v_name) > 0 then

set v_csv_idx = v_csv_idx + length(v_name) + 1;

insert ignore into writers (name) values (v_name);

select writer_id into v_writer_id from writers where name = v_name;

insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);

else

set v_csv_done = 1;

end if;

end while;

until v_done end repeat;

close v_cursor;

commit;

truncate table movies_unf;

end#

delimiter ;

修改了存储过程,使其不会跳过键值!

drop procedure if exists normalise_movies_unf;

delimiter #

create procedure normalise_movies_unf()

begin

declare v_movieID int unsigned default 0;

declare v_movie_title varchar(255);

declare v_writers varchar(1024);

declare v_movie_id int unsigned default 0;

declare v_writer_id int unsigned default 0;

declare v_name varchar(255);

declare v_csv_done tinyint unsigned default 0;

declare v_csv_idx int unsigned default 0;

declare v_done tinyint default 0;

declare v_cursor cursor for

select distinct movieID, movie_title, written_by from movies_unf;

declare continue handler for not found set v_done = 1;

start transaction;

open v_cursor;

repeat

fetch v_cursor into v_movieID, v_movie_title, v_writers;

set v_movie_title = trim(v_movie_title);

set v_writers = replace(v_writers,' ', '');

-- insert the movie

if not exists (select 1 from movies where title = v_movie_title) then

insert ignore into movies (title) values (v_movie_title);

end if;

select movie_id into v_movie_id from movies where title = v_movie_title;

-- split the out the writers and insert

set v_csv_done = 0;

set v_csv_idx = 1;

while not v_csv_done do

set v_name = substring(v_writers, v_csv_idx,

if(locate(',', v_writers, v_csv_idx) > 0,

locate(',', v_writers, v_csv_idx) - v_csv_idx,

length(v_writers)));

set v_name = trim(v_name);

if length(v_name) > 0 then

set v_csv_idx = v_csv_idx + length(v_name) + 1;

if not exists (select 1 from writers where name = v_name) then

insert ignore into writers (name) values (v_name);

end if;

select writer_id into v_writer_id from writers where name = v_name;

insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);

else

set v_csv_done = 1;

end if;

end while;

until v_done end repeat;

close v_cursor;

commit;

truncate table movies_unf;

end#

delimiter ;

以上是 MYSQL-将数据拆分为多行 的全部内容, 来源链接: utcz.com/qa/424189.html

回到顶部